spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [spark] MaxGekk commented on a change in pull request #27524: [WIP][SQL] Support `SimpleDateFormat` and `FastDateFormat` as legacy date/timestamp formatters
Date Mon, 10 Feb 2020 16:16:35 GMT
MaxGekk commented on a change in pull request #27524: [WIP][SQL] Support `SimpleDateFormat`
and `FastDateFormat` as legacy date/timestamp formatters
URL: https://github.com/apache/spark/pull/27524#discussion_r377164426
 
 

 ##########
 File path: sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
 ##########
 @@ -525,170 +529,194 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession
{
   }
 
   test("from_unixtime") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
-    val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
-    val fmt3 = "yy-MM-dd HH-mm-ss"
-    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
-    val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a",
"b")
-    checkAnswer(
-      df.select(from_unixtime(col("a"))),
-      Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.select(from_unixtime(col("a"), fmt2)),
-      Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.select(from_unixtime(col("a"), fmt3)),
-      Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.selectExpr("from_unixtime(a)"),
-      Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.selectExpr(s"from_unixtime(a, '$fmt2')"),
-      Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.selectExpr(s"from_unixtime(a, '$fmt3')"),
-      Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+        val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
+        val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
+        val fmt3 = "yy-MM-dd HH-mm-ss"
+        val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
+        val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a",
"b")
+        checkAnswer(
+          df.select(from_unixtime(col("a"))),
+          Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.select(from_unixtime(col("a"), fmt2)),
+          Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.select(from_unixtime(col("a"), fmt3)),
+          Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.selectExpr("from_unixtime(a)"),
+          Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.selectExpr(s"from_unixtime(a, '$fmt2')"),
+          Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.selectExpr(s"from_unixtime(a, '$fmt3')"),
+          Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
+      }
+    }
   }
 
   private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis)
 
   test("unix_timestamp") {
-    val date1 = Date.valueOf("2015-07-24")
-    val date2 = Date.valueOf("2015-07-25")
-    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
-    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
-    val s1 = "2015/07/24 10:00:00.5"
-    val s2 = "2015/07/25 02:02:02.6"
-    val ss1 = "2015-07-24 10:00:00"
-    val ss2 = "2015-07-25 02:02:02"
-    val fmt = "yyyy/MM/dd HH:mm:ss.S"
-    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
-    checkAnswer(df.select(unix_timestamp(col("ts"))), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.select(unix_timestamp(col("ss"))), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq(
-      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
-    checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq(
-      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
-    checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-
-    val x1 = "2015-07-24 10:00:00"
-    val x2 = "2015-25-07 02:02:02"
-    val x3 = "2015-07-24 25:02:02"
-    val x4 = "2015-24-07 26:02:02"
-    val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
-    val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
-
-    val df1 = Seq(x1, x2, x3, x4).toDF("x")
-    checkAnswer(df1.select(unix_timestamp(col("x"))), Seq(
-      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
-    checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq(
-      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
-    checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq(
-      Row(null), Row(secs(ts2.getTime)), Row(null), Row(null)))
-    checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
-      Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
-
-    // invalid format
-    checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq(
-      Row(null), Row(null), Row(null), Row(null)))
-
-    // february
-    val y1 = "2016-02-29"
-    val y2 = "2017-02-29"
-    val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
-    val df2 = Seq(y1, y2).toDF("y")
-    checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
-      Row(secs(ts5.getTime)), Row(null)))
-
-    val now = sql("select unix_timestamp()").collect().head.getLong(0)
-    checkAnswer(
-      sql(s"select cast ($now as timestamp)"),
-      Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now))))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val date1 = Date.valueOf("2015-07-24")
+        val date2 = Date.valueOf("2015-07-25")
+        val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
+        val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
+        val s1 = "2015/07/24 10:00:00.5"
+        val s2 = "2015/07/25 02:02:02.6"
+        val ss1 = "2015-07-24 10:00:00"
+        val ss2 = "2015-07-25 02:02:02"
+        val fmt = "yyyy/MM/dd HH:mm:ss.S"
+        val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+        checkAnswer(df.select(unix_timestamp(col("ts"))), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.select(unix_timestamp(col("ss"))), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq(
+          Row(secs(date1.getTime)), Row(secs(date2.getTime))))
+        checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq(
+          Row(secs(date1.getTime)), Row(secs(date2.getTime))))
+        checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
+        val x1 = "2015-07-24 10:00:00"
+        val x2 = "2015-25-07 02:02:02"
+        val x3 = "2015-07-24 25:02:02"
+        val x4 = "2015-24-07 26:02:02"
+        val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
+        val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
+
+        val df1 = Seq(x1, x2, x3, x4).toDF("x")
+        checkAnswer(df1.select(unix_timestamp(col("x"))), Seq(
+          Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
+        checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq(
+          Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
+        checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq(
+          Row(null), Row(secs(ts2.getTime)), Row(null), Row(null)))
+        checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
+          Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
+
+        // invalid format
+        checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq(
+          Row(null), Row(null), Row(null), Row(null)))
+
+        // february
+        val y1 = "2016-02-29"
+        val y2 = "2017-02-29"
+        val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
+        val df2 = Seq(y1, y2).toDF("y")
+        checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
+          Row(secs(ts5.getTime)), Row(null)))
+
+        val now = sql("select unix_timestamp()").collect().head.getLong(0)
+        checkAnswer(
+          sql(s"select cast ($now as timestamp)"),
+          Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now))))
+      }
+    }
   }
 
   test("to_unix_timestamp") {
-    val date1 = Date.valueOf("2015-07-24")
-    val date2 = Date.valueOf("2015-07-25")
-    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
-    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
-    val s1 = "2015/07/24 10:00:00.5"
-    val s2 = "2015/07/25 02:02:02.6"
-    val ss1 = "2015-07-24 10:00:00"
-    val ss2 = "2015-07-25 02:02:02"
-    val fmt = "yyyy/MM/dd HH:mm:ss.S"
-    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
-    checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq(
-      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
-    checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-
-    val x1 = "2015-07-24 10:00:00"
-    val x2 = "2015-25-07 02:02:02"
-    val x3 = "2015-07-24 25:02:02"
-    val x4 = "2015-24-07 26:02:02"
-    val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
-    val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
-
-    val df1 = Seq(x1, x2, x3, x4).toDF("x")
-    checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq(
-      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
-    checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
-      Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
-
-    // february
-    val y1 = "2016-02-29"
-    val y2 = "2017-02-29"
-    val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
-    val df2 = Seq(y1, y2).toDF("y")
-    checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
-      Row(secs(ts5.getTime)), Row(null)))
-
-    // invalid format
-    checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq(
-      Row(null), Row(null), Row(null), Row(null)))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val date1 = Date.valueOf("2015-07-24")
+        val date2 = Date.valueOf("2015-07-25")
+        val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
+        val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
+        val s1 = "2015/07/24 10:00:00.5"
+        val s2 = "2015/07/25 02:02:02.6"
+        val ss1 = "2015-07-24 10:00:00"
+        val ss2 = "2015-07-25 02:02:02"
+        val fmt = "yyyy/MM/dd HH:mm:ss.S"
+        val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+        checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq(
+          Row(secs(date1.getTime)), Row(secs(date2.getTime))))
+        checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
+        val x1 = "2015-07-24 10:00:00"
+        val x2 = "2015-25-07 02:02:02"
+        val x3 = "2015-07-24 25:02:02"
+        val x4 = "2015-24-07 26:02:02"
+        val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
+        val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
+
+        val df1 = Seq(x1, x2, x3, x4).toDF("x")
+        checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq(
+          Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
+        checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
+          Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
+
+        // february
+        val y1 = "2016-02-29"
+        val y2 = "2017-02-29"
+        val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
+        val df2 = Seq(y1, y2).toDF("y")
+        checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
+          Row(secs(ts5.getTime)), Row(null)))
+
+        // invalid format
+        checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq(
+          Row(null), Row(null), Row(null), Row(null)))
+      }
+    }
   }
 
 
   test("to_timestamp") {
-    val date1 = Date.valueOf("2015-07-24")
-    val date2 = Date.valueOf("2015-07-25")
-    val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
-    val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
-    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
-    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
-    val s1 = "2015/07/24 10:00:00.5"
-    val s2 = "2015/07/25 02:02:02.6"
-    val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
-    val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
-    val ss1 = "2015-07-24 10:00:00"
-    val ss2 = "2015-07-25 02:02:02"
-    val fmt = "yyyy/MM/dd HH:mm:ss.S"
-    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
-
-    checkAnswer(df.select(to_timestamp(col("ss"))),
-      df.select(unix_timestamp(col("ss")).cast("timestamp")))
-    checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
-      Row(ts1), Row(ts2)))
-    checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
-      Row(ts1m), Row(ts2m)))
-    checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(
-      Row(ts1), Row(ts2)))
-    checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq(
-      Row(ts_date1), Row(ts_date2)))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val date1 = Date.valueOf("2015-07-24")
+        val date2 = Date.valueOf("2015-07-25")
+        val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
+        val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
+        val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
+        val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
+        val s1 = "2015/07/24 10:00:00.5"
+        val s2 = "2015/07/25 02:02:02.6"
+        val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
+        val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
+        val ss1 = "2015-07-24 10:00:00"
+        val ss2 = "2015-07-25 02:02:02"
+        val fmt = "yyyy/MM/dd HH:mm:ss.S"
+        val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+
+        checkAnswer(df.select(to_timestamp(col("ss"))),
+          df.select(unix_timestamp(col("ss")).cast("timestamp")))
+        checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
+          Row(ts1), Row(ts2)))
+        if (legacyParser) {
 
 Review comment:
   I had to handle legacy mode especially due to behavior change of `to_timestamp`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message