From ffe39e4a1c31f606caeed3f3e193e9945b7e8923 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Fri, 4 Sep 2015 11:30:50 -0700
Subject: [PATCH 1/8] [SPARK-10439] [sql] Add bound checks to DateTimeUtils.

There were a couple of places where Spark SQL would silently truncate
data if certain timestamps were provided.

In a couple of other places, the way to calculate Julian day-based
timestamps was changed a little so that Spark writes data that is
friendlier to Hive; mostly, Hive does not like very much when
the data has negative values for either the days or nanos part,
so avoid those.

The values that trigger these use cases are very uncommon (very large
values in either end of the spectrum), so this shouldn't really affect
any existing applications.
---
 .../sql/catalyst/util/DateTimeUtils.scala     | 25 ++++++++++++---
 .../spark/sql/RandomDataGenerator.scala       |  6 +++-
 .../expressions/LiteralGenerator.scala        |  5 ++-
 .../catalyst/util/DateTimeUtilsSuite.scala    | 31 +++++++++++++++++++
 4 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index d652fce3fd9b6..705eb063386ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.lang.{Long => JLong}
 import java.sql.{Date, Timestamp}
 import java.text.{DateFormat, SimpleDateFormat}
 import java.util.{TimeZone, Calendar}
@@ -56,6 +57,12 @@ object DateTimeUtils {
 
   @transient lazy val defaultTimeZone = TimeZone.getDefault
 
+  // Constants defining the allowed ranges for timestampts that fit in parquet files. Mostly
+  // used by tests to avoid duplication.
+  private[spark] final val MIN_TIMESTAMP: Long =
+    -(DateTimeUtils.JULIAN_DAY_OF_EPOCH * DateTimeUtils.SECONDS_PER_DAY * 1000)
+  private[spark] final val MAX_TIMESTAMP: Long = java.lang.Long.MAX_VALUE / 1000
+
   // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
   private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] {
     override protected def initialValue: TimeZone = {
@@ -82,7 +89,9 @@ object DateTimeUtils {
     // SPARK-6785: use Math.floor so negative number of days (dates before 1970)
     // will correctly work as input for function toJavaDate(Int)
     val millisLocal = millisUtc + threadLocalLocalTimeZone.get().getOffset(millisUtc)
-    Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
+    val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY)
+    require(days <= Integer.MAX_VALUE && days >= Integer.MIN_VALUE, "Date exceeeds allowed range.")
+    days.toInt
   }
 
   // reverse of millisToDays
@@ -172,6 +181,8 @@ object DateTimeUtils {
    */
   def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
     if (t != null) {
+      require(t.getTime() <= JLong.MAX_VALUE / 1000 && t.getTime() >= JLong.MIN_VALUE / 1000,
+        s"Timestamp exceeds allowed range (${t.getTime()}).")
       t.getTime() * 1000L + (t.getNanos().toLong / 1000) % 1000L
     } else {
       0L
@@ -193,9 +204,15 @@ object DateTimeUtils {
    */
   def toJulianDay(us: SQLTimestamp): (Int, Long) = {
     val seconds = us / MICROS_PER_SECOND
-    val day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
-    val secondsInDay = seconds % SECONDS_PER_DAY
-    val nanos = (us % MICROS_PER_SECOND) * 1000L
+    var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
+    var secondsInDay = seconds % SECONDS_PER_DAY
+    var nanos = (us % MICROS_PER_SECOND) * 1000L
+    if (us < 0) {
+      day -= 1
+      secondsInDay += SECONDS_PER_DAY
+      nanos += (SECONDS_PER_DAY * MICROS_PER_SECOND * 1000L)
+      require(day >= 0, "Timestamp exceeds allowed range.")
+    }
     (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 11e0c120f4072..a6a7bd038ad1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -24,6 +24,7 @@ import java.math.MathContext
 import scala.util.Random
 
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
@@ -106,7 +107,10 @@ object RandomDataGenerator {
       })
       case BooleanType => Some(() => rand.nextBoolean())
       case DateType => Some(() => new java.sql.Date(rand.nextInt()))
-      case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong()))
+      case TimestampType => Some { () =>
+        val range = DateTimeUtils.MAX_TIMESTAMP - DateTimeUtils.MIN_TIMESTAMP
+        new java.sql.Timestamp((range * rand.nextDouble()).toLong + DateTimeUtils.MIN_TIMESTAMP)
+      }
       case CalendarIntervalType => Some(() => {
         val months = rand.nextInt(1000)
         val ns = rand.nextLong()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
index ee6d25157fc08..af3d58db6b153 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
@@ -24,6 +24,7 @@ import org.scalatest.Matchers
 import org.scalatest.prop.GeneratorDrivenPropertyChecks
 
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
@@ -94,7 +95,9 @@ object LiteralGenerator {
     for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)
 
   lazy val timestampLiteralGen: Gen[Literal] =
-    for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType)
+    for { t <- Gen.chooseNum(DateTimeUtils.MIN_TIMESTAMP, DateTimeUtils.MAX_TIMESTAMP) } yield {
+      Literal.create(new Timestamp(t), TimestampType)
+    }
 
   lazy val calendarIntervalLiterGen: Gen[Literal] =
     for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 1596bb79fa94b..72c9e0aadcfbf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.lang.{Long => JLong}
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.{Calendar, TimeZone}
@@ -425,4 +426,34 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
     test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
   }
+
+  test("SPARK-10439: bound checks") {
+    // Avoid truncation when converting from ms to us.
+    Seq(JLong.MIN_VALUE, JLong.MAX_VALUE).foreach { ts =>
+      intercept[IllegalArgumentException] {
+        fromJavaTimestamp(new Timestamp(ts))
+      }
+    }
+
+    // Avoid negative Julian day counts since the Hive/Impala timestamp spec does not
+    // expect them.
+    val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND)
+    intercept[IllegalArgumentException] {
+      toJulianDay(julianDay - 1)
+    }
+
+    // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp
+    // spec expects.
+    val (_, nanos) = toJulianDay(julianDay + 1)
+    assert(nanos >= 0)
+
+    // Make sure timestamp in ms does not overflow number of days returned as int. millisToDays()
+    // considers the current time zone, so add 2 full days when checking to be sure.
+    val msPerDay = SECONDS_PER_DAY * 1000L
+    val tooManyDays = Integer.MAX_VALUE * msPerDay
+    intercept[IllegalArgumentException] {
+      millisToDays(tooManyDays + 2 * msPerDay)
+    }
+  }
+
 }

From 63d0c39e94515a2b4ea9d992ea10cca4411b6264 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 8 Sep 2015 15:26:32 -0700
Subject: [PATCH 2/8] Revert to previous bounds for timestamp values.

---
 .../spark/sql/catalyst/util/DateTimeUtils.scala      | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 577f13be12a7b..705eb063386ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -59,15 +59,9 @@ object DateTimeUtils {
 
   // Constants defining the allowed ranges for timestampts that fit in parquet files. Mostly
   // used by tests to avoid duplication.
-  //
-  // -62135740800000L is the number of milliseconds before January 1, 1970, 00:00:00 GMT
-  // for "0001-01-01 00:00:00.000000". We need to find a
-  // number that is greater or equals to this number as a valid timestamp value.
-  //
-  // 253402329599999L is the the number of milliseconds since
-  // January 1, 1970, 00:00:00 GMT for "9999-12-31 23:59:59.999999".
-  private[spark] final val MIN_TIMESTAMP: Long = -62135740800000L
-  private[spark] final val MAX_TIMESTAMP: Long = 253402329599999L
+  private[spark] final val MIN_TIMESTAMP: Long =
+    -(DateTimeUtils.JULIAN_DAY_OF_EPOCH * DateTimeUtils.SECONDS_PER_DAY * 1000)
+  private[spark] final val MAX_TIMESTAMP: Long = java.lang.Long.MAX_VALUE / 1000
 
   // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
   private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] {

From edeb06e25baed588ef6cbba87524cd12e3c814b7 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 9 Sep 2015 14:18:19 -0700
Subject: [PATCH 3/8] Revert to previous limits, and explain why they're
 needed.

Note that since Timestamp.valueOf() parses things in the host's time zone,
these limits might be a little bit off for someone living ahead of UTC. Not
sure what the best solution is here (aside from avoiding Timestamp.valueOf).
---
 .../apache/spark/sql/catalyst/util/DateTimeUtils.scala   | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 705eb063386ed..b2963875771fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -57,11 +57,12 @@ object DateTimeUtils {
 
   @transient lazy val defaultTimeZone = TimeZone.getDefault
 
-  // Constants defining the allowed ranges for timestampts that fit in parquet files. Mostly
-  // used by tests to avoid duplication.
+  // Constants defining the allowed ranges for timestamps that can be parsed by java.sql.Timestamp.
+  // Limits are calculated based on UTC.
   private[spark] final val MIN_TIMESTAMP: Long =
-    -(DateTimeUtils.JULIAN_DAY_OF_EPOCH * DateTimeUtils.SECONDS_PER_DAY * 1000)
-  private[spark] final val MAX_TIMESTAMP: Long = java.lang.Long.MAX_VALUE / 1000
+    Timestamp.valueOf("0001-01-01 00:00:00").getTime() + defaultTimeZone.getRawOffset()
+  private[spark] final val MAX_TIMESTAMP: Long =
+    Timestamp.valueOf("9999-12-31 23:59:59.999999").getTime() + defaultTimeZone.getRawOffset()
 
   // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
   private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] {

From 922c09eda885b30e40ea21306fbafa036b918127 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 9 Sep 2015 14:57:34 -0700
Subject: [PATCH 4/8] Remove unneded checks.

New limits make these unnecessary. Tweak tests accordingly.
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala  |  5 +----
 .../sql/catalyst/util/DateTimeUtilsSuite.scala   | 16 +---------------
 2 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index b2963875771fb..06942472239d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -90,9 +90,7 @@ object DateTimeUtils {
     // SPARK-6785: use Math.floor so negative number of days (dates before 1970)
     // will correctly work as input for function toJavaDate(Int)
     val millisLocal = millisUtc + threadLocalLocalTimeZone.get().getOffset(millisUtc)
-    val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY)
-    require(days <= Integer.MAX_VALUE && days >= Integer.MIN_VALUE, "Date exceeeds allowed range.")
-    days.toInt
+    Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
   }
 
   // reverse of millisToDays
@@ -212,7 +210,6 @@ object DateTimeUtils {
       day -= 1
       secondsInDay += SECONDS_PER_DAY
       nanos += (SECONDS_PER_DAY * MICROS_PER_SECOND * 1000L)
-      require(day >= 0, "Timestamp exceeds allowed range.")
     }
     (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 72c9e0aadcfbf..3d131c1af0e94 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -435,25 +435,11 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       }
     }
 
-    // Avoid negative Julian day counts since the Hive/Impala timestamp spec does not
-    // expect them.
-    val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND)
-    intercept[IllegalArgumentException] {
-      toJulianDay(julianDay - 1)
-    }
-
     // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp
     // spec expects.
+    val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND)
     val (_, nanos) = toJulianDay(julianDay + 1)
     assert(nanos >= 0)
-
-    // Make sure timestamp in ms does not overflow number of days returned as int. millisToDays()
-    // considers the current time zone, so add 2 full days when checking to be sure.
-    val msPerDay = SECONDS_PER_DAY * 1000L
-    val tooManyDays = Integer.MAX_VALUE * msPerDay
-    intercept[IllegalArgumentException] {
-      millisToDays(tooManyDays + 2 * msPerDay)
-    }
   }
 
 }

From 3869816a1604b2c6daa058e3a73eeda6c0df1c8b Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 9 Sep 2015 15:42:29 -0700
Subject: [PATCH 5/8] Fix negative timestamp to julian day ts conversion.

---
 .../sql/catalyst/util/DateTimeUtils.scala     | 23 +++++++++++--------
 .../catalyst/util/DateTimeUtilsSuite.scala    |  9 +++++---
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 06942472239d7..f00c7ea10dc8a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -202,16 +202,19 @@ object DateTimeUtils {
    * Returns Julian day and nanoseconds in a day from the number of microseconds
    */
   def toJulianDay(us: SQLTimestamp): (Int, Long) = {
-    val seconds = us / MICROS_PER_SECOND
-    var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
-    var secondsInDay = seconds % SECONDS_PER_DAY
-    var nanos = (us % MICROS_PER_SECOND) * 1000L
-    if (us < 0) {
-      day -= 1
-      secondsInDay += SECONDS_PER_DAY
-      nanos += (SECONDS_PER_DAY * MICROS_PER_SECOND * 1000L)
-    }
-    (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
+    if (us >= 0) {
+      val seconds = us / MICROS_PER_SECOND
+      var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
+      var secondsInDay = seconds % SECONDS_PER_DAY
+      var nanos = (us % MICROS_PER_SECOND) * 1000L
+      (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
+    } else {
+      val usPerDay = MICROS_PER_SECOND * SECONDS_PER_DAY
+      val usFromJulianEpoch = us + JULIAN_DAY_OF_EPOCH * usPerDay
+      val day = usFromJulianEpoch / usPerDay
+      val micros = usFromJulianEpoch % usPerDay
+      (day.toInt, micros * 1000L)
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 3d131c1af0e94..d981c23a53617 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -435,11 +435,14 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       }
     }
 
-    // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp
-    // spec expects.
+    // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp spec
+    // expects. Also make sure it's less than 999999999 (the limit imposed by java.sql.Timestamp).
     val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND)
-    val (_, nanos) = toJulianDay(julianDay + 1)
+    val nextDay = julianDay + SECONDS_PER_DAY * MICROS_PER_SECOND
+    val (day, nanos) = toJulianDay(nextDay + 1)
+    assert(day === 1)
     assert(nanos >= 0)
+    assert(nanos <= 999999999)
   }
 
 }

From 6064ef23b980036bbf9e8f14d25f03b8ad1a18f0 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 9 Sep 2015 15:47:11 -0700
Subject: [PATCH 6/8] Fix timestamp range check, add check to fromJavaDate().

---
 .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index f00c7ea10dc8a..9583a2226d036 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -147,6 +147,8 @@ object DateTimeUtils {
    * Returns the number of days since epoch from from java.sql.Date.
    */
   def fromJavaDate(date: Date): SQLDate = {
+    require(date.getTime() <= MAX_TIMESTAMP && date.getTime() >= MIN_TIMESTAMP,
+      s"Timestamp exceeds allowed range.")
     millisToDays(date.getTime)
   }
 
@@ -180,8 +182,8 @@ object DateTimeUtils {
    */
   def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
     if (t != null) {
-      require(t.getTime() <= JLong.MAX_VALUE / 1000 && t.getTime() >= JLong.MIN_VALUE / 1000,
-        s"Timestamp exceeds allowed range (${t.getTime()}).")
+      require(t.getTime() <= MAX_TIMESTAMP && t.getTime() >= MIN_TIMESTAMP,
+        s"Timestamp exceeds allowed range.")
       t.getTime() * 1000L + (t.getNanos().toLong / 1000) % 1000L
     } else {
       0L

From 7a4cf9ea92222e598abbf014c6e5f8c46dec35ce Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 9 Sep 2015 15:48:46 -0700
Subject: [PATCH 7/8] And a test.

---
 .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index d981c23a53617..dc4b63ae7114f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -428,11 +428,14 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("SPARK-10439: bound checks") {
-    // Avoid truncation when converting from ms to us.
+    // Avoid truncation when converting from ms to us. Make sure dates are within allowed range.
     Seq(JLong.MIN_VALUE, JLong.MAX_VALUE).foreach { ts =>
       intercept[IllegalArgumentException] {
         fromJavaTimestamp(new Timestamp(ts))
       }
+      intercept[IllegalArgumentException] {
+        fromJavaDate(new Date(ts))
+      }
     }
 
     // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp spec

From fa8d6f676489a0e6c8a4a017fbc25b55f0c80808 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 9 Sep 2015 15:57:45 -0700
Subject: [PATCH 8/8] Simplify toJulianDay().

---
 .../sql/catalyst/util/DateTimeUtils.scala      | 18 +++++-------------
 .../sql/catalyst/util/DateTimeUtilsSuite.scala |  4 ++--
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 9583a2226d036..38de7df7c2cf3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -42,6 +42,7 @@ object DateTimeUtils {
   final val JULIAN_DAY_OF_EPOCH = 2440588
   final val SECONDS_PER_DAY = 60 * 60 * 24L
   final val MICROS_PER_SECOND = 1000L * 1000L
+  final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY
   final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
 
   final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
@@ -204,19 +205,10 @@ object DateTimeUtils {
    * Returns Julian day and nanoseconds in a day from the number of microseconds
    */
   def toJulianDay(us: SQLTimestamp): (Int, Long) = {
-    if (us >= 0) {
-      val seconds = us / MICROS_PER_SECOND
-      var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
-      var secondsInDay = seconds % SECONDS_PER_DAY
-      var nanos = (us % MICROS_PER_SECOND) * 1000L
-      (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
-    } else {
-      val usPerDay = MICROS_PER_SECOND * SECONDS_PER_DAY
-      val usFromJulianEpoch = us + JULIAN_DAY_OF_EPOCH * usPerDay
-      val day = usFromJulianEpoch / usPerDay
-      val micros = usFromJulianEpoch % usPerDay
-      (day.toInt, micros * 1000L)
-    }
+    val usFromJulianEpoch = us + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY
+    val day = usFromJulianEpoch / MICROS_PER_DAY
+    val micros = usFromJulianEpoch % MICROS_PER_DAY
+    (day.toInt, micros * 1000L)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index dc4b63ae7114f..37dbfb97773f9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -440,8 +440,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
     // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp spec
     // expects. Also make sure it's less than 999999999 (the limit imposed by java.sql.Timestamp).
-    val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND)
-    val nextDay = julianDay + SECONDS_PER_DAY * MICROS_PER_SECOND
+    val julianDay = -(JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY)
+    val nextDay = julianDay + MICROS_PER_DAY
     val (day, nanos) = toJulianDay(nextDay + 1)
     assert(day === 1)
     assert(nanos >= 0)