spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-15379][SQL] check special invalid date
Date Mon, 23 May 2016 02:30:38 GMT
Repository: spark
Updated Branches:
  refs/heads/master 3eff65f82 -> fc44b694b


[SPARK-15379][SQL] check special invalid date

## What changes were proposed in this pull request?

When invalid date string like "2015-02-29 00:00:00" are cast as date or timestamp using spark
sql, it used to not return null but another valid date (2015-03-01 in this case).
In this pr, invalid date string like "2016-02-29" and "2016-04-31" are returned as null when
cast as date or timestamp.

## How was this patch tested?

Unit tests are added.

(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Author: wangyang <wangyang@haizhi.com>

Closes #13169 from wangyang1992/invalid_date.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc44b694
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc44b694
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc44b694

Branch: refs/heads/master
Commit: fc44b694bf5162b3a044768da4627b9969909829
Parents: 3eff65f
Author: wangyang <wangyang@haizhi.com>
Authored: Sun May 22 19:30:14 2016 -0700
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Sun May 22 19:30:14 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 27 +++++++++++++++++---
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 19 ++++++++++++++
 2 files changed, 42 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fc44b694/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index f84c659..e08328a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -58,6 +58,7 @@ object DateTimeUtils {
   final val YearZero = -17999
   final val toYearZero = to2001 + 7304850
   final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
+  final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
 
   @transient lazy val defaultTimeZone = TimeZone.getDefault
 
@@ -333,8 +334,7 @@ object DateTimeUtils {
       digitsMilli += 1
     }
 
-    if (!justTime && (segments(0) < 0 || segments(0) > 9999 || segments(1)
< 1 ||
-        segments(1) > 12 || segments(2) < 1 || segments(2) > 31)) {
+    if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
       return None
     }
 
@@ -414,10 +414,10 @@ object DateTimeUtils {
       return None
     }
     segments(i) = currentSegmentValue
-    if (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 || segments(1)
> 12 ||
-        segments(2) < 1 || segments(2) > 31) {
+    if (isInvalidDate(segments(0), segments(1), segments(2))) {
       return None
     }
+
     val c = threadLocalGmtCalendar.get()
     c.clear()
     c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0)
@@ -426,6 +426,25 @@ object DateTimeUtils {
   }
 
   /**
+   * Return true if the date is invalid.
+   */
+  private def isInvalidDate(year: Int, month: Int, day: Int): Boolean = {
+    if (year < 0 || year > 9999 || month < 1 || month > 12 || day < 1 || day
> 31) {
+      return true
+    }
+    if (month == 2) {
+      if (isLeapYear(year) && day > 29) {
+        return true
+      } else if (!isLeapYear(year) && day > 28) {
+        return true
+      }
+    } else if (!MonthOf31Days.contains(month) && day > 30) {
+      return true
+    }
+    false
+  }
+
+  /**
    * Returns the microseconds since year zero (-17999) from microseconds since epoch.
    */
   private def absoluteMicroSecond(microsec: SQLTimestamp): SQLTimestamp = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fc44b694/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 6745b4b..28e30c2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -353,6 +353,25 @@ class DateTimeUtilsSuite extends SparkFunSuite {
         c.getTimeInMillis * 1000 + 123456)
   }
 
+  test("SPARK-15379: special invalid date string") {
+    // Test stringToDate
+    assert(stringToDate(
+      UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
+    assert(stringToDate(
+      UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("2015-02-29")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("2015-04-31")).isEmpty)
+
+
+    // Test stringToTimestamp
+    assert(stringToTimestamp(
+      UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
+    assert(stringToTimestamp(
+      UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("2015-02-29")).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("2015-04-31")).isEmpty)
+  }
+
   test("hours") {
     val c = Calendar.getInstance()
     c.set(2015, 2, 18, 13, 2, 11)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message