spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-9794] [SQL] Fix datetime parsing in SparkSQL.
Date Wed, 16 Sep 2015 22:30:21 GMT
Repository: spark
Updated Branches:
  refs/heads/master 896edb51a -> d39f15ea2


[SPARK-9794] [SQL] Fix datetime parsing in SparkSQL.

This fixes https://issues.apache.org/jira/browse/SPARK-9794 by using a real ISO8601 parser.
(courtesy of the xml component of the standard java library)

cc: angelini

Author: Kevin Cox <kevincox@kevincox.ca>

Closes #8396 from kevincox/kevincox-sql-time-parsing.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d39f15ea
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d39f15ea
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d39f15ea

Branch: refs/heads/master
Commit: d39f15ea2b8bed5342d2f8e3c1936f915c470783
Parents: 896edb5
Author: Kevin Cox <kevincox@kevincox.ca>
Authored: Wed Sep 16 15:30:17 2015 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Wed Sep 16 15:30:17 2015 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 27 ++++++-----------
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 32 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d39f15ea/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 687ca00..400c432 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
 import java.sql.{Date, Timestamp}
 import java.text.{DateFormat, SimpleDateFormat}
 import java.util.{TimeZone, Calendar}
+import javax.xml.bind.DatatypeConverter;
 
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -109,30 +110,22 @@ object DateTimeUtils {
   }
 
   def stringToTime(s: String): java.util.Date = {
-    if (!s.contains('T')) {
+    var indexOfGMT = s.indexOf("GMT");
+    if (indexOfGMT != -1) {
+      // ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
+      val s0 = s.substring(0, indexOfGMT)
+      val s1 = s.substring(indexOfGMT + 3)
+      // Mapped to 2000-01-01T00:00+01:00
+      stringToTime(s0 + s1)
+    } else if (!s.contains('T')) {
       // JDBC escape string
       if (s.contains(' ')) {
         Timestamp.valueOf(s)
       } else {
         Date.valueOf(s)
       }
-    } else if (s.endsWith("Z")) {
-      // this is zero timezone of ISO8601
-      stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
-    } else if (s.indexOf("GMT") == -1) {
-      // timezone with ISO8601
-      val inset = "+00.00".length
-      val s0 = s.substring(0, s.length - inset)
-      val s1 = s.substring(s.length - inset, s.length)
-      if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
-        stringToTime(s0 + "GMT" + s1)
-      } else {
-        stringToTime(s0 + ".0GMT" + s1)
-      }
     } else {
-      // ISO8601 with GMT insert
-      val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz"
)
-      ISO8601GMT.parse(s)
+      DatatypeConverter.parseDateTime(s).getTime()
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d39f15ea/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 6b9a11f..4633594 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -136,6 +136,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
   }
 
+  test("string to time") {
+    // Tests with UTC.
+    var c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
+    c.set(Calendar.MILLISECOND, 0)
+
+    c.set(1900, 0, 1, 0, 0, 0)
+    assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime())
+
+    c.set(2000, 11, 30, 10, 0, 0)
+    assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime())
+
+    // Tests with set time zone.
+    c.setTimeZone(TimeZone.getTimeZone("GMT-04:00"))
+    c.set(Calendar.MILLISECOND, 0)
+
+    c.set(1900, 0, 1, 0, 0, 0)
+    assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime())
+
+    c.set(1900, 0, 1, 0, 0, 0)
+    assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime())
+
+    // Tests with local time zone.
+    c.setTimeZone(TimeZone.getDefault())
+    c.set(Calendar.MILLISECOND, 0)
+
+    c.set(2000, 11, 30, 0, 0, 0)
+    assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis()))
+
+    c.set(2000, 11, 30, 10, 0, 0)
+    assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis()))
+  }
+
   test("string to timestamp") {
     var c = Calendar.getInstance()
     c.set(1969, 11, 31, 16, 0, 0)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message