spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-11864][SQL] Improve performance of max/min
Date Fri, 20 Nov 2015 01:14:16 GMT
Repository: spark
Updated Branches:
  refs/heads/master b2cecb80e -> ee2140774


[SPARK-11864][SQL] Improve performance of max/min

This PR has the following optimization:

1) The greatest/least already does the null-check, so the `If` and `IsNull` are not necessary.

2) In greatest/least, it should initialize the result using the first child (removing one
block).

3) For primitive types, the generated greater expression is too complicated (`a > b ? 1
: (a < b) ? -1 : 0) > 0`), should be as simple as `a > b`

Combine these optimization, this could improve the performance of `ss_max` query by 30%.

Author: Davies Liu <davies@databricks.com>

Closes #9846 from davies/improve_max.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee214077
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee214077
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee214077

Branch: refs/heads/master
Commit: ee21407747fb00db2f26d1119446ccbb20c19232
Parents: b2cecb8
Author: Davies Liu <davies@databricks.com>
Authored: Thu Nov 19 17:14:10 2015 -0800
Committer: Reynold Xin <rxin@databricks.com>
Committed: Thu Nov 19 17:14:10 2015 -0800

----------------------------------------------------------------------
 .../catalyst/expressions/aggregate/Max.scala    |  5 ++-
 .../catalyst/expressions/aggregate/Min.scala    |  5 ++-
 .../expressions/codegen/CodeGenerator.scala     | 12 +++++++
 .../expressions/conditionalExpressions.scala    | 38 +++++++++++---------
 .../catalyst/expressions/nullExpressions.scala  | 10 ++++--
 5 files changed, 45 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ee214077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
index 61cae44..9060031 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
@@ -46,13 +46,12 @@ case class Max(child: Expression) extends DeclarativeAggregate {
   )
 
   override lazy val updateExpressions: Seq[Expression] = Seq(
-    /* max = */ If(IsNull(child), max, If(IsNull(max), child, Greatest(Seq(max, child))))
+    /* max = */ Greatest(Seq(max, child))
   )
 
   override lazy val mergeExpressions: Seq[Expression] = {
-    val greatest = Greatest(Seq(max.left, max.right))
     Seq(
-      /* max = */ If(IsNull(max.right), max.left, If(IsNull(max.left), max.right, greatest))
+      /* max = */ Greatest(Seq(max.left, max.right))
     )
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ee214077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
index 242456d..39f7afb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
@@ -47,13 +47,12 @@ case class Min(child: Expression) extends DeclarativeAggregate {
   )
 
   override lazy val updateExpressions: Seq[Expression] = Seq(
-    /* min = */ If(IsNull(child), min, If(IsNull(min), child, Least(Seq(min, child))))
+    /* min = */ Least(Seq(min, child))
   )
 
   override lazy val mergeExpressions: Seq[Expression] = {
-    val least = Least(Seq(min.left, min.right))
     Seq(
-      /* min = */ If(IsNull(min.right), min.left, If(IsNull(min.left), min.right, least))
+      /* min = */ Least(Seq(min.left, min.right))
     )
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ee214077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 1718cfb..1b7260c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -330,6 +330,18 @@ class CodeGenContext {
   }
 
   /**
+    * Generates code for greater of two expressions.
+    *
+    * @param dataType data type of the expressions
+    * @param c1 name of the variable of expression 1's output
+    * @param c2 name of the variable of expression 2's output
+    */
+  def genGreater(dataType: DataType, c1: String, c2: String): String = javaType(dataType)
match {
+    case JAVA_BYTE | JAVA_SHORT | JAVA_INT | JAVA_LONG => s"$c1 > $c2"
+    case _ => s"(${genComp(dataType, c1, c2)}) > 0"
+  }
+
+  /**
    * List of java data types that have special accessors and setters in [[InternalRow]].
    */
   val primitiveTypes =

http://git-wip-us.apache.org/repos/asf/spark/blob/ee214077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 0d4af43..694a2a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -348,19 +348,22 @@ case class Least(children: Seq[Expression]) extends Expression {
 
   override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
     val evalChildren = children.map(_.gen(ctx))
-    def updateEval(i: Int): String =
+    val first = evalChildren(0)
+    val rest = evalChildren.drop(1)
+    def updateEval(eval: GeneratedExpressionCode): String =
       s"""
-        if (!${evalChildren(i).isNull} && (${ev.isNull} ||
-          ${ctx.genComp(dataType, evalChildren(i).value, ev.value)} < 0)) {
+        ${eval.code}
+        if (!${eval.isNull} && (${ev.isNull} ||
+          ${ctx.genGreater(dataType, ev.value, eval.value)})) {
           ${ev.isNull} = false;
-          ${ev.value} = ${evalChildren(i).value};
+          ${ev.value} = ${eval.value};
         }
       """
     s"""
-      ${evalChildren.map(_.code).mkString("\n")}
-      boolean ${ev.isNull} = true;
-      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-      ${children.indices.map(updateEval).mkString("\n")}
+      ${first.code}
+      boolean ${ev.isNull} = ${first.isNull};
+      ${ctx.javaType(dataType)} ${ev.value} = ${first.value};
+      ${rest.map(updateEval).mkString("\n")}
     """
   }
 }
@@ -403,19 +406,22 @@ case class Greatest(children: Seq[Expression]) extends Expression {
 
   override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
     val evalChildren = children.map(_.gen(ctx))
-    def updateEval(i: Int): String =
+    val first = evalChildren(0)
+    val rest = evalChildren.drop(1)
+    def updateEval(eval: GeneratedExpressionCode): String =
       s"""
-        if (!${evalChildren(i).isNull} && (${ev.isNull} ||
-          ${ctx.genComp(dataType, evalChildren(i).value, ev.value)} > 0)) {
+        ${eval.code}
+        if (!${eval.isNull} && (${ev.isNull} ||
+          ${ctx.genGreater(dataType, eval.value, ev.value)})) {
           ${ev.isNull} = false;
-          ${ev.value} = ${evalChildren(i).value};
+          ${ev.value} = ${eval.value};
         }
       """
     s"""
-      ${evalChildren.map(_.code).mkString("\n")}
-      boolean ${ev.isNull} = true;
-      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-      ${children.indices.map(updateEval).mkString("\n")}
+      ${first.code}
+      boolean ${ev.isNull} = ${first.isNull};
+      ${ctx.javaType(dataType)} ${ev.value} = ${first.value};
+      ${rest.map(updateEval).mkString("\n")}
     """
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/ee214077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 94deafb..df4747d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -62,11 +62,15 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   }
 
   override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    val first = children(0)
+    val rest = children.drop(1)
+    val firstEval = first.gen(ctx)
     s"""
-      boolean ${ev.isNull} = true;
-      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+      ${firstEval.code}
+      boolean ${ev.isNull} = ${firstEval.isNull};
+      ${ctx.javaType(dataType)} ${ev.value} = ${firstEval.value};
     """ +
-    children.map { e =>
+      rest.map { e =>
       val eval = e.gen(ctx)
       s"""
         if (${ev.isNull}) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message