From a6b529bb4f9b5e31112bae991df3410a05c55d58 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 12 Oct 2019 22:21:14 -0700 Subject: [PATCH 1/5] [SPARK-26321][SQL] Port HIVE-15297: Hive should not split semicolon within quoted string literals This pr port [HIVE-15297](https://issues.apache.org/jira/browse/HIVE-15297) to fix **spark-sql** should not split semicolon within quoted string literals. unit tests and manual tests: ![image](https://user-images.githubusercontent.com/5399861/60395592-5666ea00-9b68-11e9-99dc-0e8ea98de32b.png) Closes #25018 from wangyum/SPARK-26321. Authored-by: Yuming Wang Signed-off-by: Yuming Wang --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 110 +++++++++++++++++- .../sql/hive/thriftserver/CliSuite.scala | 9 ++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index bb96cea2b0ae1..0c133252d82aa 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive.thriftserver import java.io._ -import java.util.{ArrayList => JArrayList, Locale} +import java.util.{ArrayList => JArrayList, List => JList, Locale} import scala.collection.JavaConverters._ @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.log4j.Level import org.apache.thrift.transport.TSocket +import sun.misc.{Signal, SignalHandler} import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkHadoopUtil @@ -434,5 +435,112 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { ret } } + + // Adapted processLine from Hive 2.3's CliDriver.processLine. + override def processLine(line: String, allowInterrupting: Boolean): Int = { + var oldSignal: SignalHandler = null + var interruptSignal: Signal = null + + if (allowInterrupting) { + // Remember all threads that were running at the time we started line processing. + // Hook up the custom Ctrl+C handler while processing this line + interruptSignal = new Signal("INT") + oldSignal = Signal.handle(interruptSignal, new SignalHandler() { + private var interruptRequested: Boolean = false + + override def handle(signal: Signal) { + val initialRequest = !interruptRequested + interruptRequested = true + + // Kill the VM on second ctrl+c + if (!initialRequest) { + console.printInfo("Exiting the JVM") + System.exit(127) + } + + // Interrupt the CLI thread to stop the current statement and return + // to prompt + console.printInfo("Interrupting... Be patient, this might take some time.") + console.printInfo("Press Ctrl+C again to kill JVM") + + HiveInterruptUtils.interrupt() + } + }) + } + + try { + var lastRet: Int = 0 + + // we can not use "split" function directly as ";" may be quoted + val commands = splitSemiColon(line).asScala + var command: String = "" + for (oneCmd <- commands) { + if (StringUtils.endsWith(oneCmd, "\\")) { + command += StringUtils.chop(oneCmd) + ";" + } else { + command += oneCmd + if (!StringUtils.isBlank(command)) { + val ret = processCmd(command) + command = "" + lastRet = ret + val ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS) + if (ret != 0 && !ignoreErrors) { + CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf]) + ret + } + } + } + } + CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf]) + lastRet + } finally { + // Once we are done processing the line, restore the old handler + if (oldSignal != null && interruptSignal != null) { + Signal.handle(interruptSignal, oldSignal) + } + } + } + + // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon. + private def splitSemiColon(line: String): JList[String] = { + var insideSingleQuote = false + var insideDoubleQuote = false + var escape = false + var beginIndex = 0 + val ret = new JArrayList[String] + for (index <- 0 until line.length) { + if (line.charAt(index) == '\'') { + // take a look to see if it is escaped + if (!escape) { + // flip the boolean variable + insideSingleQuote = !insideSingleQuote + } + } else if (line.charAt(index) == '\"') { + // take a look to see if it is escaped + if (!escape) { + // flip the boolean variable + insideDoubleQuote = !insideDoubleQuote + } + } else if (line.charAt(index) == ';') { + if (insideSingleQuote || insideDoubleQuote) { + // do not split + } else { + // split, do not include ; itself + ret.add(line.substring(beginIndex, index)) + beginIndex = index + 1 + } + } else { + // nothing to do + } + // set the escape + if (escape) { + escape = false + } else if (line.charAt(index) == '\\') { + escape = true + } + } + ret.add(line.substring(beginIndex)) + ret + } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 933fd7369380a..f35e45319daa7 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -296,4 +296,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "set spark.sql.warehouse.dir;" -> tmpDir.getAbsolutePath) tmpDir.delete() } + + test("SPARK-26321 Should not split semicolon within quoted string literals") { + runCliWithin(3.minute)( + """select 'Test1', "^;^";""" -> "Test1\t^;^", + """select 'Test2', "\";";""" -> "Test2\t\";", + """select 'Test3', "\';";""" -> "Test3\t';", + "select concat('Test4', ';');" -> "Test4;" + ) + } } From 7cbdeb7446e102c550e2ae71c6d506d367b72666 Mon Sep 17 00:00:00 2001 From: Javier Date: Tue, 3 Mar 2020 09:55:15 -0600 Subject: [PATCH 2/5] [SPARK-30049][SQL] SQL fails to parse when comment contains an unmatched quote character A SQL statement that contains a comment with an unmatched quote character can lead to a parse error: - Added a insideComment flag in the splitter method to avoid checking single and double quotes within a comment: ``` spark-sql> SELECT 1 -- someone's comment here > ; Error in query: extraneous input ';' expecting (line 2, pos 0) == SQL == SELECT 1 -- someone's comment here ; ^^^ ``` This misbehaviour was not present on previous spark versions. - No - New tests were added. Closes #27321 from javierivanov/SPARK-30049B. Lead-authored-by: Javier Co-authored-by: Javier Fuentes Signed-off-by: Thomas Graves --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 24 +++++++++++++++---- .../sql/hive/thriftserver/CliSuite.scala | 22 +++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 0c133252d82aa..44227ed0871e3 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -505,24 +505,40 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false + var insideComment = false var escape = false var beginIndex = 0 + var endIndex = line.length val ret = new JArrayList[String] + for (index <- 0 until line.length) { - if (line.charAt(index) == '\'') { + if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped if (!escape) { // flip the boolean variable insideSingleQuote = !insideSingleQuote } - } else if (line.charAt(index) == '\"') { + } else if (line.charAt(index) == '\"' && !insideComment) { // take a look to see if it is escaped if (!escape) { // flip the boolean variable insideDoubleQuote = !insideDoubleQuote } + } else if (line.charAt(index) == '-') { + val hasNext = index + 1 < line.length + if (insideDoubleQuote || insideSingleQuote || insideComment) { + // Ignores '-' in any case of quotes or comment. + // Avoids to start a comment(--) within a quoted segment or already in a comment. + // Sample query: select "quoted value --" + // ^^ avoids starting a comment if it's inside quotes. + } else if (hasNext && line.charAt(index + 1) == '-') { + // ignore quotes and ; + insideComment = true + // ignore eol + endIndex = index + } } else if (line.charAt(index) == ';') { - if (insideSingleQuote || insideDoubleQuote) { + if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { // split, do not include ; itself @@ -539,7 +555,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } } - ret.add(line.substring(beginIndex)) + ret.add(line.substring(beginIndex, endIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index f35e45319daa7..5ae1a894af24f 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -305,4 +305,26 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "select concat('Test4', ';');" -> "Test4;" ) } + + test("SPARK-30049 Should not complain for quotes in commented lines") { + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here + |;""".stripMargin -> "testcomment" + ) + } + + test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here \\ + | comment continues here with single ' quote \\ + | extra ' \\ + |;""".stripMargin -> "testcomment" + ) + runCliWithin(1.minute)( + """SELECT concat('test', 'comment') -- someone's comment here \\ + | comment continues here with single ' quote \\ + | extra ' \\ + | ;""".stripMargin -> "testcomment" + ) + } } From bda05e86b3f51df913d8b601319446186ae5605e Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Wed, 6 May 2020 04:34:43 +0000 Subject: [PATCH 3/5] [SPARK-31595][SQL] Spark sql should allow unescaped quote mark in quoted string ### What changes were proposed in this pull request? `def splitSemiColon` cannot handle unescaped quote mark like "'" or '"' correctly. When there are unmatched quotes in a string, `splitSemiColon` will not drop off semicolon as expected. ### Why are the changes needed? Some regex expression will use quote mark in string. We should process semicolon correctly. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Added Unit test and also manual test. Closes #28393 from adrian-wang/unescaped. Authored-by: Daoyuan Wang Signed-off-by: Wenchen Fan --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 9 +++++++-- .../apache/spark/sql/hive/thriftserver/CliSuite.scala | 9 +++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 44227ed0871e3..fdc4e2482f73c 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -502,6 +502,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon. + // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted + // string, the origin implementation from Hive will not drop the trailing semicolon as expected, + // hence we refined this function a little bit. private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false @@ -514,13 +517,15 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { for (index <- 0 until line.length) { if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped - if (!escape) { + // See the comment above about SPARK-31595 + if (!escape && !insideDoubleQuote) { // flip the boolean variable insideSingleQuote = !insideSingleQuote } } else if (line.charAt(index) == '\"' && !insideComment) { // take a look to see if it is escaped - if (!escape) { + // See the comment above about SPARK-31595 + if (!escape && !insideSingleQuote) { // flip the boolean variable insideDoubleQuote = !insideDoubleQuote } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 5ae1a894af24f..06f9fda292fe5 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -327,4 +327,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { | ;""".stripMargin -> "testcomment" ) } + + test("SPARK-31595 Should allow unescaped quote mark in quoted string") { + runCliWithin(1.minute)( + "SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235" + ) + runCliWithin(1.minute)( + "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223" + ) + } } From c55c21bb01b7735c8b1e5235cb36270ee74cb6e7 Mon Sep 17 00:00:00 2001 From: Javier Fuentes Date: Tue, 12 May 2020 13:46:24 +0000 Subject: [PATCH 4/5] [SPARK-31102][SQL] Spark-sql fails to parse when contains comment This PR introduces a change to false for the insideComment flag on a newline. Fixing the issue introduced by SPARK-30049. Previously on SPARK-30049 a comment containing an unclosed quote produced the following issue: ``` spark-sql> SELECT 1 -- someone's comment here > ; Error in query: extraneous input ';' expecting (line 2, pos 0) == SQL == SELECT 1 -- someone's comment here ; ^^^ ``` This was caused because there was no flag for comment sections inside the splitSemiColon method to ignore quotes. SPARK-30049 added that flag and fixed the issue, but introduced the follwoing problem: ``` spark-sql> select > 1, > -- two > 2; Error in query: mismatched input '' expecting {'(', 'ADD', 'AFTER', 'ALL', 'ALTER', ...}(line 3, pos 2) == SQL == select 1, --^^^ ``` This issue is generated by a missing turn-off for the insideComment flag with a newline. No - For previous tests using line-continuity(`\`) it was added a line-continuity rule in the SqlBase.g4 file to add the functionality to the SQL context. - A new test for inline comments was added. Closes #27920 from javierivanov/SPARK-31102. Authored-by: Javier Fuentes Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../sql/catalyst/parser/PlanParserSuite.scala | 10 ++++++++++ .../hive/thriftserver/SparkSQLCLIDriver.scala | 12 +++++------ .../sql/hive/thriftserver/CliSuite.scala | 20 ++++++++++--------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 0792a7b7eff54..82e4bc2c2ba77 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1075,7 +1075,7 @@ fragment LETTER ; SIMPLE_COMMENT - : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) + : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) ; BRACKETED_EMPTY_COMMENT diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index da69c123c8703..b744a7fb2dc27 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -46,6 +46,16 @@ class PlanParserSuite extends AnalysisTest { } } + test("single comment case one") { + val plan = table("a").select(star()) + assertEqual("-- single comment\nSELECT * FROM a", plan) + } + + test("single comment case two") { + val plan = table("a").select(star()) + assertEqual("-- single comment\\\nwith line continuity\nSELECT * FROM a", plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index fdc4e2482f73c..095ce32493dfb 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -511,7 +511,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { var insideComment = false var escape = false var beginIndex = 0 - var endIndex = line.length val ret = new JArrayList[String] for (index <- 0 until line.length) { @@ -539,8 +538,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } else if (hasNext && line.charAt(index + 1) == '-') { // ignore quotes and ; insideComment = true - // ignore eol - endIndex = index } } else if (line.charAt(index) == ';') { if (insideSingleQuote || insideDoubleQuote || insideComment) { @@ -550,8 +547,11 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { ret.add(line.substring(beginIndex, index)) beginIndex = index + 1 } - } else { - // nothing to do + } else if (line.charAt(index) == '\n') { + // with a new line the inline comment should end. + if (!escape) { + insideComment = false + } } // set the escape if (escape) { @@ -560,7 +560,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } } - ret.add(line.substring(beginIndex, endIndex)) + ret.add(line.substring(beginIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 06f9fda292fe5..45cdcb608109a 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -313,18 +313,20 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { ) } - test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { + test("SPARK-31102 spark-sql fails to parse when contains comment") { runCliWithin(1.minute)( - """SELECT concat('test', 'comment') -- someone's comment here \\ - | comment continues here with single ' quote \\ - | extra ' \\ - |;""".stripMargin -> "testcomment" + """SELECT concat('test', 'comment'), + | -- someone's comment here + | 2;""".stripMargin -> "testcomment" ) + } + + test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { runCliWithin(1.minute)( - """SELECT concat('test', 'comment') -- someone's comment here \\ - | comment continues here with single ' quote \\ - | extra ' \\ - | ;""".stripMargin -> "testcomment" + """SELECT concat('test', 'comment') -- someone's comment here \ + | comment continues here with single ' quote \ + | extra ' \ + |;""".stripMargin -> "testcomment" ) } From 851a9899af38656d8bfbf5447685cb23d7f61ce4 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Tue, 5 Jan 2021 15:55:30 +0900 Subject: [PATCH 5/5] [SPARK-33100][SQL] Ignore a semicolon inside a bracketed comment in spark-sql Now the spark-sql does not support parse the sql statements with bracketed comments. For the sql statements: ``` /* SELECT 'test'; */ SELECT 'test'; ``` Would be split to two statements: The first one: `/* SELECT 'test'` The second one: `*/ SELECT 'test'` Then it would throw an exception because the first one is illegal. In this PR, we ignore the content in bracketed comments while splitting the sql statements. Besides, we ignore the comment without any content. Spark-sql might split the statements inside bracketed comments and it is not correct. No. Added UT. Closes #29982 from turboFei/SPARK-33110. Lead-authored-by: fwang12 Co-authored-by: turbofei Signed-off-by: Takeshi Yamamuro --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 40 +++++++++++++++---- .../sql/hive/thriftserver/CliSuite.scala | 21 ++++++++++ 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 095ce32493dfb..8c749248e863b 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -505,14 +505,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted // string, the origin implementation from Hive will not drop the trailing semicolon as expected, // hence we refined this function a little bit. + // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql. private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false - var insideComment = false + var insideSimpleComment = false + var bracketedCommentLevel = 0 var escape = false var beginIndex = 0 + var includingStatement = false val ret = new JArrayList[String] + def insideBracketedComment: Boolean = bracketedCommentLevel > 0 + def insideComment: Boolean = insideSimpleComment || insideBracketedComment + def statementBegin(index: Int): Boolean = includingStatement || (!insideComment && + index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) + for (index <- 0 until line.length) { if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped @@ -536,21 +544,33 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Sample query: select "quoted value --" // ^^ avoids starting a comment if it's inside quotes. } else if (hasNext && line.charAt(index + 1) == '-') { - // ignore quotes and ; - insideComment = true + // ignore quotes and ; in simple comment + insideSimpleComment = true } } else if (line.charAt(index) == ';') { if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { - // split, do not include ; itself - ret.add(line.substring(beginIndex, index)) + if (includingStatement) { + // split, do not include ; itself + ret.add(line.substring(beginIndex, index)) + } beginIndex = index + 1 + includingStatement = false } } else if (line.charAt(index) == '\n') { - // with a new line the inline comment should end. + // with a new line the inline simple comment should end. if (!escape) { - insideComment = false + insideSimpleComment = false + } + } else if (line.charAt(index) == '/' && !insideSimpleComment) { + val hasNext = index + 1 < line.length + if (insideSingleQuote || insideDoubleQuote) { + // Ignores '/' in any case of quotes + } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { + bracketedCommentLevel -= 1 + } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { + bracketedCommentLevel += 1 } } // set the escape @@ -559,8 +579,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } else if (line.charAt(index) == '\\') { escape = true } + + includingStatement = statementBegin(index) + } + if (includingStatement) { + ret.add(line.substring(beginIndex)) } - ret.add(line.substring(beginIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 45cdcb608109a..6402d3dd32609 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -338,4 +338,25 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223" ) } + + test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") { + runCliWithin(4.minute)( + "/* SELECT 'test';*/ SELECT 'test';" -> "test", + ";;/* SELECT 'test';*/ SELECT 'test';" -> "test", + "/* SELECT 'test';*/;; SELECT 'test';" -> "test", + "SELECT 'test'; -- SELECT 'test';" -> "", + "SELECT 'test'; /* SELECT 'test';*/;" -> "", + "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test", + "/*\nmulti-line\n*/ SELECT 'test';" -> "test", + "/*/* multi-level bracketed*/ SELECT 'test';" -> "test" + ) + } + + test("SPARK-33100: test sql statements with hint in bracketed comment") { + runCliWithin(2.minute)( + "CREATE TEMPORARY VIEW t AS SELECT * FROM VALUES(1, 2) AS t(k, v);" -> "", + "EXPLAIN EXTENDED SELECT /*+ broadcast(t) */ * from t;" + -> "ResolvedHint (strategy=broadcast)" + ) + } }