diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 186597fa64780..7205293aa48c5 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -341,8 +341,17 @@ public UTF8String substringSQL(int pos, int length) { // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. int len = numChars(); + // `len + pos` does not overflow as `len >= 0`. int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0); - int end = (length == Integer.MAX_VALUE) ? len : start + length; + + int end; + if ((long) start + length > Integer.MAX_VALUE) { + end = Integer.MAX_VALUE; + } else if ((long) start + length < Integer.MIN_VALUE) { + end = Integer.MIN_VALUE; + } else { + end = start + length; + } return substring(start, end); } diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index 8f933877f82e6..70e276f7e5a8b 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -390,6 +390,10 @@ public void substringSQL() { assertEquals(fromString("example"), e.substringSQL(0, Integer.MAX_VALUE)); assertEquals(fromString("example"), e.substringSQL(1, Integer.MAX_VALUE)); assertEquals(fromString("xample"), e.substringSQL(2, Integer.MAX_VALUE)); + assertEquals(EMPTY_UTF8, e.substringSQL(-100, -100)); + assertEquals(EMPTY_UTF8, e.substringSQL(-1207959552, -1207959552)); + assertEquals(fromString("pl"), e.substringSQL(-3, 2)); + assertEquals(EMPTY_UTF8, e.substringSQL(Integer.MIN_VALUE, 6)); } @Test diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index f18364d844ce1..967ccc42c632d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -236,6 +236,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "xample", row) + // Substring with from negative position with negative length + checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType), + Literal.create(-1207959552, IntegerType)), "", row) + val s_notNull = 'a.string.notNull.at(0) assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)