spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-14321][SQL] Reduce date format cost and string-to-date cost in date functions
Date Thu, 09 Jun 2016 23:37:32 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 0408793aa -> b42e3d886


[SPARK-14321][SQL] Reduce date format cost and string-to-date cost in date functions

## What changes were proposed in this pull request?
The current implementations of `UnixTime` and `FromUnixTime` do not cache their parser/formatter
as much as they could. This PR resolved this issue.

This PR is a take over from https://github.com/apache/spark/pull/13522 and further optimizes
the re-use of the parser/formatter. It also fixes the improves handling (catching the actual
exception instead of `Throwable`). All credits for this work should go to rajeshbalamohan.

This PR closes https://github.com/apache/spark/pull/13522

## How was this patch tested?
Current tests.

Author: Herman van Hovell <hvanhovell@databricks.com>
Author: Rajesh Balamohan <rbalamohan@apache.org>

Closes #13581 from hvanhovell/SPARK-14321.

(cherry picked from commit b0768538e56e5bbda7aaabbe2a0197e30ba5f993)
Signed-off-by: Reynold Xin <rxin@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b42e3d88
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b42e3d88
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b42e3d88

Branch: refs/heads/branch-2.0
Commit: b42e3d886f1688feb6e9ca85a60ce5e6295e8489
Parents: 0408793
Author: Herman van Hovell <hvanhovell@databricks.com>
Authored: Thu Jun 9 16:37:18 2016 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Thu Jun 9 16:37:29 2016 -0700

----------------------------------------------------------------------
 .../expressions/datetimeExpressions.scala       | 48 ++++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b42e3d88/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 69c32f4..773431d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -399,6 +399,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes
{
   override def nullable: Boolean = true
 
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
+  private lazy val formatter: SimpleDateFormat =
+    Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
 
   override def eval(input: InternalRow): Any = {
     val t = left.eval(input)
@@ -411,11 +413,11 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes
{
         case TimestampType =>
           t.asInstanceOf[Long] / 1000000L
         case StringType if right.foldable =>
-          if (constFormat != null) {
-            Try(new SimpleDateFormat(constFormat.toString).parse(
-              t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
-          } else {
+          if (constFormat == null || formatter == null) {
             null
+          } else {
+            Try(formatter.parse(
+              t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
           }
         case StringType =>
           val f = right.eval(input)
@@ -434,13 +436,10 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes
{
     left.dataType match {
       case StringType if right.foldable =>
         val sdf = classOf[SimpleDateFormat].getName
-        val fString = if (constFormat == null) null else constFormat.toString
-        val formatter = ctx.freshName("formatter")
-        if (fString == null) {
-          ev.copy(code = s"""
-            boolean ${ev.isNull} = true;
-            ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
+        if (formatter == null) {
+          ExprCode("", "true", ctx.defaultValue(dataType))
         } else {
+          val formatterName = ctx.addReferenceObj("formatter", formatter, sdf)
           val eval1 = left.genCode(ctx)
           ev.copy(code = s"""
             ${eval1.code}
@@ -448,10 +447,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes
{
             ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
             if (!${ev.isNull}) {
               try {
-                $sdf $formatter = new $sdf("$fString");
-                ${ev.value} =
-                  $formatter.parse(${eval1.value}.toString()).getTime() / 1000L;
-              } catch (java.lang.Throwable e) {
+                ${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() /
1000L;
+              } catch (java.text.ParseException e) {
                 ${ev.isNull} = true;
               }
             }""")
@@ -463,7 +460,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes
{
             try {
               ${ev.value} =
                 (new $sdf($format.toString())).parse($string.toString()).getTime() / 1000L;
-            } catch (java.lang.Throwable e) {
+            } catch (java.lang.IllegalArgumentException e) {
+              ${ev.isNull} = true;
+            } catch (java.text.ParseException e) {
               ${ev.isNull} = true;
             }
           """
@@ -520,6 +519,8 @@ case class FromUnixTime(sec: Expression, format: Expression)
   override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType)
 
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
+  private lazy val formatter: SimpleDateFormat =
+    Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
 
   override def eval(input: InternalRow): Any = {
     val time = left.eval(input)
@@ -527,10 +528,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
       null
     } else {
       if (format.foldable) {
-        if (constFormat == null) {
+        if (constFormat == null || formatter == null) {
           null
         } else {
-          Try(UTF8String.fromString(new SimpleDateFormat(constFormat.toString).format(
+          Try(UTF8String.fromString(formatter.format(
             new java.util.Date(time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
         }
       } else {
@@ -549,11 +550,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val sdf = classOf[SimpleDateFormat].getName
     if (format.foldable) {
-      if (constFormat == null) {
-        ev.copy(code = s"""
-          boolean ${ev.isNull} = true;
-          ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
+      if (formatter == null) {
+        ExprCode("", "true", "(UTF8String) null")
       } else {
+        val formatterName = ctx.addReferenceObj("formatter", formatter, sdf)
         val t = left.genCode(ctx)
         ev.copy(code = s"""
           ${t.code}
@@ -561,9 +561,9 @@ case class FromUnixTime(sec: Expression, format: Expression)
           ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
           if (!${ev.isNull}) {
             try {
-              ${ev.value} = UTF8String.fromString(new $sdf("${constFormat.toString}").format(
+              ${ev.value} = UTF8String.fromString($formatterName.format(
                 new java.util.Date(${t.value} * 1000L)));
-            } catch (java.lang.Throwable e) {
+            } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
             }
           }""")
@@ -574,7 +574,7 @@ case class FromUnixTime(sec: Expression, format: Expression)
         try {
           ${ev.value} = UTF8String.fromString((new $sdf($f.toString())).format(
             new java.util.Date($seconds * 1000L)));
-        } catch (java.lang.Throwable e) {
+        } catch (java.lang.IllegalArgumentException e) {
           ${ev.isNull} = true;
         }""".stripMargin
       })


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message