From a6b529bb4f9b5e31112bae991df3410a05c55d58 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 12 Oct 2019 22:21:14 -0700
Subject: [PATCH 1/5] [SPARK-26321][SQL] Port HIVE-15297: Hive should not split
 semicolon within quoted string literals

This pr port [HIVE-15297](https://issues.apache.org/jira/browse/HIVE-15297) to fix **spark-sql** should not split semicolon within quoted string literals.

unit tests and manual tests:
![image](https://user-images.githubusercontent.com/5399861/60395592-5666ea00-9b68-11e9-99dc-0e8ea98de32b.png)

Closes #25018 from wangyum/SPARK-26321.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Yuming Wang <wgyumg@gmail.com>
---
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 110 +++++++++++++++++-
 .../sql/hive/thriftserver/CliSuite.scala      |   9 ++
 2 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index bb96cea2b0ae1..0c133252d82aa 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.io._
-import java.util.{ArrayList => JArrayList, Locale}
+import java.util.{ArrayList => JArrayList, List => JList, Locale}
 
 import scala.collection.JavaConverters._
 
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.log4j.Level
 import org.apache.thrift.transport.TSocket
+import sun.misc.{Signal, SignalHandler}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -434,5 +435,112 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
       ret
     }
   }
+
+  // Adapted processLine from Hive 2.3's CliDriver.processLine.
+  override def processLine(line: String, allowInterrupting: Boolean): Int = {
+    var oldSignal: SignalHandler = null
+    var interruptSignal: Signal = null
+
+    if (allowInterrupting) {
+      // Remember all threads that were running at the time we started line processing.
+      // Hook up the custom Ctrl+C handler while processing this line
+      interruptSignal = new Signal("INT")
+      oldSignal = Signal.handle(interruptSignal, new SignalHandler() {
+        private var interruptRequested: Boolean = false
+
+        override def handle(signal: Signal) {
+          val initialRequest = !interruptRequested
+          interruptRequested = true
+
+          // Kill the VM on second ctrl+c
+          if (!initialRequest) {
+            console.printInfo("Exiting the JVM")
+            System.exit(127)
+          }
+
+          // Interrupt the CLI thread to stop the current statement and return
+          // to prompt
+          console.printInfo("Interrupting... Be patient, this might take some time.")
+          console.printInfo("Press Ctrl+C again to kill JVM")
+
+          HiveInterruptUtils.interrupt()
+        }
+      })
+    }
+
+    try {
+      var lastRet: Int = 0
+
+      // we can not use "split" function directly as ";" may be quoted
+      val commands = splitSemiColon(line).asScala
+      var command: String = ""
+      for (oneCmd <- commands) {
+        if (StringUtils.endsWith(oneCmd, "\\")) {
+          command += StringUtils.chop(oneCmd) + ";"
+        } else {
+          command += oneCmd
+          if (!StringUtils.isBlank(command)) {
+            val ret = processCmd(command)
+            command = ""
+            lastRet = ret
+            val ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS)
+            if (ret != 0 && !ignoreErrors) {
+              CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf])
+              ret
+            }
+          }
+        }
+      }
+      CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf])
+      lastRet
+    } finally {
+      // Once we are done processing the line, restore the old handler
+      if (oldSignal != null && interruptSignal != null) {
+        Signal.handle(interruptSignal, oldSignal)
+      }
+    }
+  }
+
+  // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
+  private def splitSemiColon(line: String): JList[String] = {
+    var insideSingleQuote = false
+    var insideDoubleQuote = false
+    var escape = false
+    var beginIndex = 0
+    val ret = new JArrayList[String]
+    for (index <- 0 until line.length) {
+      if (line.charAt(index) == '\'') {
+        // take a look to see if it is escaped
+        if (!escape) {
+          // flip the boolean variable
+          insideSingleQuote = !insideSingleQuote
+        }
+      } else if (line.charAt(index) == '\"') {
+        // take a look to see if it is escaped
+        if (!escape) {
+          // flip the boolean variable
+          insideDoubleQuote = !insideDoubleQuote
+        }
+      } else if (line.charAt(index) == ';') {
+        if (insideSingleQuote || insideDoubleQuote) {
+          // do not split
+        } else {
+          // split, do not include ; itself
+          ret.add(line.substring(beginIndex, index))
+          beginIndex = index + 1
+        }
+      } else {
+        // nothing to do
+      }
+      // set the escape
+      if (escape) {
+        escape = false
+      } else if (line.charAt(index) == '\\') {
+        escape = true
+      }
+    }
+    ret.add(line.substring(beginIndex))
+    ret
+  }
 }
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 933fd7369380a..f35e45319daa7 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -296,4 +296,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       "set spark.sql.warehouse.dir;" -> tmpDir.getAbsolutePath)
     tmpDir.delete()
   }
+
+  test("SPARK-26321 Should not split semicolon within quoted string literals") {
+    runCliWithin(3.minute)(
+      """select 'Test1', "^;^";""" -> "Test1\t^;^",
+      """select 'Test2', "\";";""" -> "Test2\t\";",
+      """select 'Test3', "\';";""" -> "Test3\t';",
+      "select concat('Test4', ';');" -> "Test4;"
+    )
+  }
 }

From 7cbdeb7446e102c550e2ae71c6d506d367b72666 Mon Sep 17 00:00:00 2001
From: Javier <jfuentes@hortonworks.com>
Date: Tue, 3 Mar 2020 09:55:15 -0600
Subject: [PATCH 2/5] [SPARK-30049][SQL] SQL fails to parse when comment
 contains an unmatched quote character

A SQL statement that contains a comment with an unmatched quote character can lead to a parse error:
- Added a insideComment flag in the splitter method to avoid checking single and double quotes within a comment:
```
spark-sql> SELECT 1 -- someone's comment here
         > ;
Error in query:
extraneous input ';' expecting <EOF>(line 2, pos 0)

== SQL ==
SELECT 1 -- someone's comment here
;
^^^
```

This misbehaviour was not present on previous spark versions.

- No

- New tests were added.

Closes #27321 from javierivanov/SPARK-30049B.

Lead-authored-by: Javier <jfuentes@hortonworks.com>
Co-authored-by: Javier Fuentes <j.fuentes.m@icloud.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 24 +++++++++++++++----
 .../sql/hive/thriftserver/CliSuite.scala      | 22 +++++++++++++++++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 0c133252d82aa..44227ed0871e3 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -505,24 +505,40 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
+    var insideComment = false
     var escape = false
     var beginIndex = 0
+    var endIndex = line.length
     val ret = new JArrayList[String]
+
     for (index <- 0 until line.length) {
-      if (line.charAt(index) == '\'') {
+      if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
         if (!escape) {
           // flip the boolean variable
           insideSingleQuote = !insideSingleQuote
         }
-      } else if (line.charAt(index) == '\"') {
+      } else if (line.charAt(index) == '\"' && !insideComment) {
         // take a look to see if it is escaped
         if (!escape) {
           // flip the boolean variable
           insideDoubleQuote = !insideDoubleQuote
         }
+      } else if (line.charAt(index) == '-') {
+        val hasNext = index + 1 < line.length
+        if (insideDoubleQuote || insideSingleQuote || insideComment) {
+          // Ignores '-' in any case of quotes or comment.
+          // Avoids to start a comment(--) within a quoted segment or already in a comment.
+          // Sample query: select "quoted value --"
+          //                                    ^^ avoids starting a comment if it's inside quotes.
+        } else if (hasNext && line.charAt(index + 1) == '-') {
+          // ignore quotes and ;
+          insideComment = true
+          // ignore eol
+          endIndex = index
+        }
       } else if (line.charAt(index) == ';') {
-        if (insideSingleQuote || insideDoubleQuote) {
+        if (insideSingleQuote || insideDoubleQuote || insideComment) {
           // do not split
         } else {
           // split, do not include ; itself
@@ -539,7 +555,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         escape = true
       }
     }
-    ret.add(line.substring(beginIndex))
+    ret.add(line.substring(beginIndex, endIndex))
     ret
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index f35e45319daa7..5ae1a894af24f 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -305,4 +305,26 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       "select concat('Test4', ';');" -> "Test4;"
     )
   }
+
+  test("SPARK-30049 Should not complain for quotes in commented lines") {
+    runCliWithin(1.minute)(
+      """SELECT concat('test', 'comment') -- someone's comment here
+        |;""".stripMargin -> "testcomment"
+    )
+  }
+
+  test("SPARK-30049 Should not complain for quotes in commented with multi-lines") {
+    runCliWithin(1.minute)(
+      """SELECT concat('test', 'comment') -- someone's comment here \\
+        | comment continues here with single ' quote \\
+        | extra ' \\
+        |;""".stripMargin -> "testcomment"
+    )
+    runCliWithin(1.minute)(
+      """SELECT concat('test', 'comment') -- someone's comment here \\
+        |   comment continues here with single ' quote \\
+        |   extra ' \\
+        |   ;""".stripMargin -> "testcomment"
+    )
+  }
 }

From bda05e86b3f51df913d8b601319446186ae5605e Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <me@daoyuan.wang>
Date: Wed, 6 May 2020 04:34:43 +0000
Subject: [PATCH 3/5] [SPARK-31595][SQL] Spark sql should allow unescaped quote
 mark in quoted string

### What changes were proposed in this pull request?
`def splitSemiColon` cannot handle unescaped quote mark like "'" or '"' correctly. When there are unmatched quotes in a string, `splitSemiColon` will not drop off semicolon as expected.

### Why are the changes needed?
Some regex expression will use quote mark in string. We should process semicolon correctly.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
Added Unit test and also manual test.

Closes #28393 from adrian-wang/unescaped.

Authored-by: Daoyuan Wang <me@daoyuan.wang>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala  | 9 +++++++--
 .../apache/spark/sql/hive/thriftserver/CliSuite.scala    | 9 +++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 44227ed0871e3..fdc4e2482f73c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -502,6 +502,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   }
 
   // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
+  // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
+  // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
+  // hence we refined this function a little bit.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
@@ -514,13 +517,15 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     for (index <- 0 until line.length) {
       if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
-        if (!escape) {
+        // See the comment above about SPARK-31595
+        if (!escape && !insideDoubleQuote) {
           // flip the boolean variable
           insideSingleQuote = !insideSingleQuote
         }
       } else if (line.charAt(index) == '\"' && !insideComment) {
         // take a look to see if it is escaped
-        if (!escape) {
+        // See the comment above about SPARK-31595
+        if (!escape && !insideSingleQuote) {
           // flip the boolean variable
           insideDoubleQuote = !insideDoubleQuote
         }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 5ae1a894af24f..06f9fda292fe5 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -327,4 +327,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
         |   ;""".stripMargin -> "testcomment"
     )
   }
+
+  test("SPARK-31595 Should allow unescaped quote mark in quoted string") {
+    runCliWithin(1.minute)(
+      "SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235"
+    )
+    runCliWithin(1.minute)(
+      "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
+    )
+  }
 }

From c55c21bb01b7735c8b1e5235cb36270ee74cb6e7 Mon Sep 17 00:00:00 2001
From: Javier Fuentes <j.fuentes.m@icloud.com>
Date: Tue, 12 May 2020 13:46:24 +0000
Subject: [PATCH 4/5] [SPARK-31102][SQL] Spark-sql fails to parse when contains
 comment

This PR introduces a change to false for the insideComment flag on a newline. Fixing the issue introduced by SPARK-30049.

Previously on SPARK-30049 a comment containing an unclosed quote produced the following issue:
```
spark-sql> SELECT 1 -- someone's comment here
         > ;
Error in query:
extraneous input ';' expecting <EOF>(line 2, pos 0)

== SQL ==
SELECT 1 -- someone's comment here
;
^^^
```

This was caused because there was no flag for comment sections inside the splitSemiColon method to ignore quotes. SPARK-30049 added that flag and fixed the issue, but introduced the follwoing problem:
```
spark-sql> select
         >   1,
         >   -- two
         >   2;
Error in query:
mismatched input '<EOF>' expecting {'(', 'ADD', 'AFTER', 'ALL', 'ALTER', ...}(line 3, pos 2)
== SQL ==
select
  1,
--^^^
```
This issue is generated by a missing turn-off for the insideComment flag with a newline.

No

- For previous tests using line-continuity(`\`) it was added a line-continuity rule in the SqlBase.g4 file to add the functionality to the SQL context.
- A new test for inline comments was added.

Closes #27920 from javierivanov/SPARK-31102.

Authored-by: Javier Fuentes <j.fuentes.m@icloud.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  2 +-
 .../sql/catalyst/parser/PlanParserSuite.scala | 10 ++++++++++
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 12 +++++------
 .../sql/hive/thriftserver/CliSuite.scala      | 20 ++++++++++---------
 4 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 0792a7b7eff54..82e4bc2c2ba77 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -1075,7 +1075,7 @@ fragment LETTER
     ;
 
 SIMPLE_COMMENT
-    : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
+    : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
     ;
 
 BRACKETED_EMPTY_COMMENT
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index da69c123c8703..b744a7fb2dc27 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -46,6 +46,16 @@ class PlanParserSuite extends AnalysisTest {
     }
   }
 
+  test("single comment case one") {
+    val plan = table("a").select(star())
+    assertEqual("-- single comment\nSELECT * FROM a", plan)
+  }
+
+  test("single comment case two") {
+    val plan = table("a").select(star())
+    assertEqual("-- single comment\\\nwith line continuity\nSELECT * FROM a", plan)
+  }
+
   test("case insensitive") {
     val plan = table("a").select(star())
     assertEqual("sELEct * FroM a", plan)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index fdc4e2482f73c..095ce32493dfb 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -511,7 +511,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     var insideComment = false
     var escape = false
     var beginIndex = 0
-    var endIndex = line.length
     val ret = new JArrayList[String]
 
     for (index <- 0 until line.length) {
@@ -539,8 +538,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         } else if (hasNext && line.charAt(index + 1) == '-') {
           // ignore quotes and ;
           insideComment = true
-          // ignore eol
-          endIndex = index
         }
       } else if (line.charAt(index) == ';') {
         if (insideSingleQuote || insideDoubleQuote || insideComment) {
@@ -550,8 +547,11 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           ret.add(line.substring(beginIndex, index))
           beginIndex = index + 1
         }
-      } else {
-        // nothing to do
+      } else if (line.charAt(index) == '\n') {
+        // with a new line the inline comment should end.
+        if (!escape) {
+          insideComment = false
+        }
       }
       // set the escape
       if (escape) {
@@ -560,7 +560,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         escape = true
       }
     }
-    ret.add(line.substring(beginIndex, endIndex))
+    ret.add(line.substring(beginIndex))
     ret
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 06f9fda292fe5..45cdcb608109a 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -313,18 +313,20 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     )
   }
 
-  test("SPARK-30049 Should not complain for quotes in commented with multi-lines") {
+  test("SPARK-31102 spark-sql fails to parse when contains comment") {
     runCliWithin(1.minute)(
-      """SELECT concat('test', 'comment') -- someone's comment here \\
-        | comment continues here with single ' quote \\
-        | extra ' \\
-        |;""".stripMargin -> "testcomment"
+      """SELECT concat('test', 'comment'),
+        |    -- someone's comment here
+        | 2;""".stripMargin -> "testcomment"
     )
+  }
+
+  test("SPARK-30049 Should not complain for quotes in commented with multi-lines") {
     runCliWithin(1.minute)(
-      """SELECT concat('test', 'comment') -- someone's comment here \\
-        |   comment continues here with single ' quote \\
-        |   extra ' \\
-        |   ;""".stripMargin -> "testcomment"
+      """SELECT concat('test', 'comment') -- someone's comment here \
+        | comment continues here with single ' quote \
+        | extra ' \
+        |;""".stripMargin -> "testcomment"
     )
   }
 

From 851a9899af38656d8bfbf5447685cb23d7f61ce4 Mon Sep 17 00:00:00 2001
From: fwang12 <fwang12@ebay.com>
Date: Tue, 5 Jan 2021 15:55:30 +0900
Subject: [PATCH 5/5] [SPARK-33100][SQL] Ignore a semicolon inside a bracketed
 comment in spark-sql

Now the spark-sql does not support parse the sql statements with bracketed comments.
For the sql statements:
```
/* SELECT 'test'; */
SELECT 'test';
```
Would be split to two statements:
The first one: `/* SELECT 'test'`
The second one: `*/ SELECT 'test'`

Then it would throw an exception because the first one is illegal.
In this PR, we ignore the content in bracketed comments while splitting the sql statements.
Besides, we ignore the comment without any content.

Spark-sql might split the statements inside bracketed comments and it is not correct.

No.

Added UT.

Closes #29982 from turboFei/SPARK-33110.

Lead-authored-by: fwang12 <fwang12@ebay.com>
Co-authored-by: turbofei <fwang12@ebay.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 40 +++++++++++++++----
 .../sql/hive/thriftserver/CliSuite.scala      | 21 ++++++++++
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 095ce32493dfb..8c749248e863b 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -505,14 +505,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
+  // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
-    var insideComment = false
+    var insideSimpleComment = false
+    var bracketedCommentLevel = 0
     var escape = false
     var beginIndex = 0
+    var includingStatement = false
     val ret = new JArrayList[String]
 
+    def insideBracketedComment: Boolean = bracketedCommentLevel > 0
+    def insideComment: Boolean = insideSimpleComment || insideBracketedComment
+    def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
+      index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
+
     for (index <- 0 until line.length) {
       if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
@@ -536,21 +544,33 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           // Sample query: select "quoted value --"
           //                                    ^^ avoids starting a comment if it's inside quotes.
         } else if (hasNext && line.charAt(index + 1) == '-') {
-          // ignore quotes and ;
-          insideComment = true
+          // ignore quotes and ; in simple comment
+          insideSimpleComment = true
         }
       } else if (line.charAt(index) == ';') {
         if (insideSingleQuote || insideDoubleQuote || insideComment) {
           // do not split
         } else {
-          // split, do not include ; itself
-          ret.add(line.substring(beginIndex, index))
+          if (includingStatement) {
+            // split, do not include ; itself
+            ret.add(line.substring(beginIndex, index))
+          }
           beginIndex = index + 1
+          includingStatement = false
         }
       } else if (line.charAt(index) == '\n') {
-        // with a new line the inline comment should end.
+        // with a new line the inline simple comment should end.
         if (!escape) {
-          insideComment = false
+          insideSimpleComment = false
+        }
+      } else if (line.charAt(index) == '/' && !insideSimpleComment) {
+        val hasNext = index + 1 < line.length
+        if (insideSingleQuote || insideDoubleQuote) {
+          // Ignores '/' in any case of quotes
+        } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
+          bracketedCommentLevel -= 1
+        } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
+          bracketedCommentLevel += 1
         }
       }
       // set the escape
@@ -559,8 +579,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
       } else if (line.charAt(index) == '\\') {
         escape = true
       }
+
+      includingStatement = statementBegin(index)
+    }
+    if (includingStatement) {
+      ret.add(line.substring(beginIndex))
     }
-    ret.add(line.substring(beginIndex))
     ret
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 45cdcb608109a..6402d3dd32609 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -338,4 +338,25 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
     )
   }
+
+  test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
+    runCliWithin(4.minute)(
+      "/* SELECT 'test';*/ SELECT 'test';" -> "test",
+      ";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
+      "/* SELECT 'test';*/;; SELECT 'test';" -> "test",
+      "SELECT 'test'; -- SELECT 'test';" -> "",
+      "SELECT 'test'; /* SELECT 'test';*/;" -> "",
+      "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
+      "/*\nmulti-line\n*/ SELECT 'test';" -> "test",
+      "/*/* multi-level bracketed*/ SELECT 'test';" -> "test"
+    )
+  }
+
+  test("SPARK-33100: test sql statements with hint in bracketed comment") {
+    runCliWithin(2.minute)(
+      "CREATE TEMPORARY VIEW t AS SELECT * FROM VALUES(1, 2) AS t(k, v);" -> "",
+      "EXPLAIN EXTENDED SELECT /*+ broadcast(t) */ * from t;"
+        -> "ResolvedHint (strategy=broadcast)"
+    )
+  }
 }