From ffe39e4a1c31f606caeed3f3e193e9945b7e8923 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Fri, 4 Sep 2015 11:30:50 -0700 Subject: [PATCH 1/8] [SPARK-10439] [sql] Add bound checks to DateTimeUtils. There were a couple of places where Spark SQL would silently truncate data if certain timestamps were provided. In a couple of other places, the way to calculate Julian day-based timestamps was changed a little so that Spark writes data that is friendlier to Hive; mostly, Hive does not like very much when the data has negative values for either the days or nanos part, so avoid those. The values that trigger these use cases are very uncommon (very large values in either end of the spectrum), so this shouldn't really affect any existing applications. --- .../sql/catalyst/util/DateTimeUtils.scala | 25 ++++++++++++--- .../spark/sql/RandomDataGenerator.scala | 6 +++- .../expressions/LiteralGenerator.scala | 5 ++- .../catalyst/util/DateTimeUtilsSuite.scala | 31 +++++++++++++++++++ 4 files changed, 61 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index d652fce3fd9b6..705eb063386ed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.util +import java.lang.{Long => JLong} import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} import java.util.{TimeZone, Calendar} @@ -56,6 +57,12 @@ object DateTimeUtils { @transient lazy val defaultTimeZone = TimeZone.getDefault + // Constants defining the allowed ranges for timestampts that fit in parquet files. Mostly + // used by tests to avoid duplication. + private[spark] final val MIN_TIMESTAMP: Long = + -(DateTimeUtils.JULIAN_DAY_OF_EPOCH * DateTimeUtils.SECONDS_PER_DAY * 1000) + private[spark] final val MAX_TIMESTAMP: Long = java.lang.Long.MAX_VALUE / 1000 + // Java TimeZone has no mention of thread safety. Use thread local instance to be safe. private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] { override protected def initialValue: TimeZone = { @@ -82,7 +89,9 @@ object DateTimeUtils { // SPARK-6785: use Math.floor so negative number of days (dates before 1970) // will correctly work as input for function toJavaDate(Int) val millisLocal = millisUtc + threadLocalLocalTimeZone.get().getOffset(millisUtc) - Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt + val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY) + require(days <= Integer.MAX_VALUE && days >= Integer.MIN_VALUE, "Date exceeeds allowed range.") + days.toInt } // reverse of millisToDays @@ -172,6 +181,8 @@ object DateTimeUtils { */ def fromJavaTimestamp(t: Timestamp): SQLTimestamp = { if (t != null) { + require(t.getTime() <= JLong.MAX_VALUE / 1000 && t.getTime() >= JLong.MIN_VALUE / 1000, + s"Timestamp exceeds allowed range (${t.getTime()}).") t.getTime() * 1000L + (t.getNanos().toLong / 1000) % 1000L } else { 0L @@ -193,9 +204,15 @@ object DateTimeUtils { */ def toJulianDay(us: SQLTimestamp): (Int, Long) = { val seconds = us / MICROS_PER_SECOND - val day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH - val secondsInDay = seconds % SECONDS_PER_DAY - val nanos = (us % MICROS_PER_SECOND) * 1000L + var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH + var secondsInDay = seconds % SECONDS_PER_DAY + var nanos = (us % MICROS_PER_SECOND) * 1000L + if (us < 0) { + day -= 1 + secondsInDay += SECONDS_PER_DAY + nanos += (SECONDS_PER_DAY * MICROS_PER_SECOND * 1000L) + require(day >= 0, "Timestamp exceeds allowed range.") + } (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 11e0c120f4072..a6a7bd038ad1e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -24,6 +24,7 @@ import java.math.MathContext import scala.util.Random import org.apache.spark.sql.types._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.unsafe.types.CalendarInterval /** @@ -106,7 +107,10 @@ object RandomDataGenerator { }) case BooleanType => Some(() => rand.nextBoolean()) case DateType => Some(() => new java.sql.Date(rand.nextInt())) - case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong())) + case TimestampType => Some { () => + val range = DateTimeUtils.MAX_TIMESTAMP - DateTimeUtils.MIN_TIMESTAMP + new java.sql.Timestamp((range * rand.nextDouble()).toLong + DateTimeUtils.MIN_TIMESTAMP) + } case CalendarIntervalType => Some(() => { val months = rand.nextInt(1000) val ns = rand.nextLong() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index ee6d25157fc08..af3d58db6b153 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -24,6 +24,7 @@ import org.scalatest.Matchers import org.scalatest.prop.GeneratorDrivenPropertyChecks import org.apache.spark.sql.types._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.unsafe.types.CalendarInterval /** @@ -94,7 +95,9 @@ object LiteralGenerator { for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = - for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) + for { t <- Gen.chooseNum(DateTimeUtils.MIN_TIMESTAMP, DateTimeUtils.MAX_TIMESTAMP) } yield { + Literal.create(new Timestamp(t), TimestampType) + } lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 1596bb79fa94b..72c9e0aadcfbf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.util +import java.lang.{Long => JLong} import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat import java.util.{Calendar, TimeZone} @@ -425,4 +426,34 @@ class DateTimeUtilsSuite extends SparkFunSuite { test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456") test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456") } + + test("SPARK-10439: bound checks") { + // Avoid truncation when converting from ms to us. + Seq(JLong.MIN_VALUE, JLong.MAX_VALUE).foreach { ts => + intercept[IllegalArgumentException] { + fromJavaTimestamp(new Timestamp(ts)) + } + } + + // Avoid negative Julian day counts since the Hive/Impala timestamp spec does not + // expect them. + val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND) + intercept[IllegalArgumentException] { + toJulianDay(julianDay - 1) + } + + // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp + // spec expects. + val (_, nanos) = toJulianDay(julianDay + 1) + assert(nanos >= 0) + + // Make sure timestamp in ms does not overflow number of days returned as int. millisToDays() + // considers the current time zone, so add 2 full days when checking to be sure. + val msPerDay = SECONDS_PER_DAY * 1000L + val tooManyDays = Integer.MAX_VALUE * msPerDay + intercept[IllegalArgumentException] { + millisToDays(tooManyDays + 2 * msPerDay) + } + } + } From 63d0c39e94515a2b4ea9d992ea10cca4411b6264 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Tue, 8 Sep 2015 15:26:32 -0700 Subject: [PATCH 2/8] Revert to previous bounds for timestamp values. --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 577f13be12a7b..705eb063386ed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -59,15 +59,9 @@ object DateTimeUtils { // Constants defining the allowed ranges for timestampts that fit in parquet files. Mostly // used by tests to avoid duplication. - // - // -62135740800000L is the number of milliseconds before January 1, 1970, 00:00:00 GMT - // for "0001-01-01 00:00:00.000000". We need to find a - // number that is greater or equals to this number as a valid timestamp value. - // - // 253402329599999L is the the number of milliseconds since - // January 1, 1970, 00:00:00 GMT for "9999-12-31 23:59:59.999999". - private[spark] final val MIN_TIMESTAMP: Long = -62135740800000L - private[spark] final val MAX_TIMESTAMP: Long = 253402329599999L + private[spark] final val MIN_TIMESTAMP: Long = + -(DateTimeUtils.JULIAN_DAY_OF_EPOCH * DateTimeUtils.SECONDS_PER_DAY * 1000) + private[spark] final val MAX_TIMESTAMP: Long = java.lang.Long.MAX_VALUE / 1000 // Java TimeZone has no mention of thread safety. Use thread local instance to be safe. private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] { From edeb06e25baed588ef6cbba87524cd12e3c814b7 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Wed, 9 Sep 2015 14:18:19 -0700 Subject: [PATCH 3/8] Revert to previous limits, and explain why they're needed. Note that since Timestamp.valueOf() parses things in the host's time zone, these limits might be a little bit off for someone living ahead of UTC. Not sure what the best solution is here (aside from avoiding Timestamp.valueOf). --- .../apache/spark/sql/catalyst/util/DateTimeUtils.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 705eb063386ed..b2963875771fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -57,11 +57,12 @@ object DateTimeUtils { @transient lazy val defaultTimeZone = TimeZone.getDefault - // Constants defining the allowed ranges for timestampts that fit in parquet files. Mostly - // used by tests to avoid duplication. + // Constants defining the allowed ranges for timestamps that can be parsed by java.sql.Timestamp. + // Limits are calculated based on UTC. private[spark] final val MIN_TIMESTAMP: Long = - -(DateTimeUtils.JULIAN_DAY_OF_EPOCH * DateTimeUtils.SECONDS_PER_DAY * 1000) - private[spark] final val MAX_TIMESTAMP: Long = java.lang.Long.MAX_VALUE / 1000 + Timestamp.valueOf("0001-01-01 00:00:00").getTime() + defaultTimeZone.getRawOffset() + private[spark] final val MAX_TIMESTAMP: Long = + Timestamp.valueOf("9999-12-31 23:59:59.999999").getTime() + defaultTimeZone.getRawOffset() // Java TimeZone has no mention of thread safety. Use thread local instance to be safe. private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] { From 922c09eda885b30e40ea21306fbafa036b918127 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Wed, 9 Sep 2015 14:57:34 -0700 Subject: [PATCH 4/8] Remove unneded checks. New limits make these unnecessary. Tweak tests accordingly. --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 5 +---- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 16 +--------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index b2963875771fb..06942472239d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -90,9 +90,7 @@ object DateTimeUtils { // SPARK-6785: use Math.floor so negative number of days (dates before 1970) // will correctly work as input for function toJavaDate(Int) val millisLocal = millisUtc + threadLocalLocalTimeZone.get().getOffset(millisUtc) - val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY) - require(days <= Integer.MAX_VALUE && days >= Integer.MIN_VALUE, "Date exceeeds allowed range.") - days.toInt + Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt } // reverse of millisToDays @@ -212,7 +210,6 @@ object DateTimeUtils { day -= 1 secondsInDay += SECONDS_PER_DAY nanos += (SECONDS_PER_DAY * MICROS_PER_SECOND * 1000L) - require(day >= 0, "Timestamp exceeds allowed range.") } (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 72c9e0aadcfbf..3d131c1af0e94 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -435,25 +435,11 @@ class DateTimeUtilsSuite extends SparkFunSuite { } } - // Avoid negative Julian day counts since the Hive/Impala timestamp spec does not - // expect them. - val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND) - intercept[IllegalArgumentException] { - toJulianDay(julianDay - 1) - } - // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp // spec expects. + val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND) val (_, nanos) = toJulianDay(julianDay + 1) assert(nanos >= 0) - - // Make sure timestamp in ms does not overflow number of days returned as int. millisToDays() - // considers the current time zone, so add 2 full days when checking to be sure. - val msPerDay = SECONDS_PER_DAY * 1000L - val tooManyDays = Integer.MAX_VALUE * msPerDay - intercept[IllegalArgumentException] { - millisToDays(tooManyDays + 2 * msPerDay) - } } } From 3869816a1604b2c6daa058e3a73eeda6c0df1c8b Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Wed, 9 Sep 2015 15:42:29 -0700 Subject: [PATCH 5/8] Fix negative timestamp to julian day ts conversion. --- .../sql/catalyst/util/DateTimeUtils.scala | 23 +++++++++++-------- .../catalyst/util/DateTimeUtilsSuite.scala | 9 +++++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 06942472239d7..f00c7ea10dc8a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -202,16 +202,19 @@ object DateTimeUtils { * Returns Julian day and nanoseconds in a day from the number of microseconds */ def toJulianDay(us: SQLTimestamp): (Int, Long) = { - val seconds = us / MICROS_PER_SECOND - var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH - var secondsInDay = seconds % SECONDS_PER_DAY - var nanos = (us % MICROS_PER_SECOND) * 1000L - if (us < 0) { - day -= 1 - secondsInDay += SECONDS_PER_DAY - nanos += (SECONDS_PER_DAY * MICROS_PER_SECOND * 1000L) - } - (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos) + if (us >= 0) { + val seconds = us / MICROS_PER_SECOND + var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH + var secondsInDay = seconds % SECONDS_PER_DAY + var nanos = (us % MICROS_PER_SECOND) * 1000L + (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos) + } else { + val usPerDay = MICROS_PER_SECOND * SECONDS_PER_DAY + val usFromJulianEpoch = us + JULIAN_DAY_OF_EPOCH * usPerDay + val day = usFromJulianEpoch / usPerDay + val micros = usFromJulianEpoch % usPerDay + (day.toInt, micros * 1000L) + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 3d131c1af0e94..d981c23a53617 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -435,11 +435,14 @@ class DateTimeUtilsSuite extends SparkFunSuite { } } - // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp - // spec expects. + // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp spec + // expects. Also make sure it's less than 999999999 (the limit imposed by java.sql.Timestamp). val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND) - val (_, nanos) = toJulianDay(julianDay + 1) + val nextDay = julianDay + SECONDS_PER_DAY * MICROS_PER_SECOND + val (day, nanos) = toJulianDay(nextDay + 1) + assert(day === 1) assert(nanos >= 0) + assert(nanos <= 999999999) } } From 6064ef23b980036bbf9e8f14d25f03b8ad1a18f0 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Wed, 9 Sep 2015 15:47:11 -0700 Subject: [PATCH 6/8] Fix timestamp range check, add check to fromJavaDate(). --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index f00c7ea10dc8a..9583a2226d036 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -147,6 +147,8 @@ object DateTimeUtils { * Returns the number of days since epoch from from java.sql.Date. */ def fromJavaDate(date: Date): SQLDate = { + require(date.getTime() <= MAX_TIMESTAMP && date.getTime() >= MIN_TIMESTAMP, + s"Timestamp exceeds allowed range.") millisToDays(date.getTime) } @@ -180,8 +182,8 @@ object DateTimeUtils { */ def fromJavaTimestamp(t: Timestamp): SQLTimestamp = { if (t != null) { - require(t.getTime() <= JLong.MAX_VALUE / 1000 && t.getTime() >= JLong.MIN_VALUE / 1000, - s"Timestamp exceeds allowed range (${t.getTime()}).") + require(t.getTime() <= MAX_TIMESTAMP && t.getTime() >= MIN_TIMESTAMP, + s"Timestamp exceeds allowed range.") t.getTime() * 1000L + (t.getNanos().toLong / 1000) % 1000L } else { 0L From 7a4cf9ea92222e598abbf014c6e5f8c46dec35ce Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Wed, 9 Sep 2015 15:48:46 -0700 Subject: [PATCH 7/8] And a test. --- .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index d981c23a53617..dc4b63ae7114f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -428,11 +428,14 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("SPARK-10439: bound checks") { - // Avoid truncation when converting from ms to us. + // Avoid truncation when converting from ms to us. Make sure dates are within allowed range. Seq(JLong.MIN_VALUE, JLong.MAX_VALUE).foreach { ts => intercept[IllegalArgumentException] { fromJavaTimestamp(new Timestamp(ts)) } + intercept[IllegalArgumentException] { + fromJavaDate(new Date(ts)) + } } // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp spec From fa8d6f676489a0e6c8a4a017fbc25b55f0c80808 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Wed, 9 Sep 2015 15:57:45 -0700 Subject: [PATCH 8/8] Simplify toJulianDay(). --- .../sql/catalyst/util/DateTimeUtils.scala | 18 +++++------------- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 4 ++-- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 9583a2226d036..38de7df7c2cf3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -42,6 +42,7 @@ object DateTimeUtils { final val JULIAN_DAY_OF_EPOCH = 2440588 final val SECONDS_PER_DAY = 60 * 60 * 24L final val MICROS_PER_SECOND = 1000L * 1000L + final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L @@ -204,19 +205,10 @@ object DateTimeUtils { * Returns Julian day and nanoseconds in a day from the number of microseconds */ def toJulianDay(us: SQLTimestamp): (Int, Long) = { - if (us >= 0) { - val seconds = us / MICROS_PER_SECOND - var day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH - var secondsInDay = seconds % SECONDS_PER_DAY - var nanos = (us % MICROS_PER_SECOND) * 1000L - (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos) - } else { - val usPerDay = MICROS_PER_SECOND * SECONDS_PER_DAY - val usFromJulianEpoch = us + JULIAN_DAY_OF_EPOCH * usPerDay - val day = usFromJulianEpoch / usPerDay - val micros = usFromJulianEpoch % usPerDay - (day.toInt, micros * 1000L) - } + val usFromJulianEpoch = us + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY + val day = usFromJulianEpoch / MICROS_PER_DAY + val micros = usFromJulianEpoch % MICROS_PER_DAY + (day.toInt, micros * 1000L) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index dc4b63ae7114f..37dbfb97773f9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -440,8 +440,8 @@ class DateTimeUtilsSuite extends SparkFunSuite { // Make sure calculated nanos are positive, since that's what the Hive/Impala timestamp spec // expects. Also make sure it's less than 999999999 (the limit imposed by java.sql.Timestamp). - val julianDay = -(JULIAN_DAY_OF_EPOCH * SECONDS_PER_DAY * MICROS_PER_SECOND) - val nextDay = julianDay + SECONDS_PER_DAY * MICROS_PER_SECOND + val julianDay = -(JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY) + val nextDay = julianDay + MICROS_PER_DAY val (day, nanos) = toJulianDay(nextDay + 1) assert(day === 1) assert(nanos >= 0)