spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lix...@apache.org
Subject spark git commit: [SPARK-21807][SQL] Override ++ operation in ExpressionSet to reduce clone time
Date Thu, 24 Aug 2017 03:35:12 GMT
Repository: spark
Updated Branches:
  refs/heads/master 6942aeeb0 -> b8aaef49f


[SPARK-21807][SQL] Override ++ operation in ExpressionSet to reduce clone time

## What changes were proposed in this pull request?
The getAliasedConstraints  fuction in LogicalPlan.scala will clone the expression set when
an element added,
and it will take a long time. This PR add a function to add multiple elements at once to reduce
the clone time.

Before modified, the cost of getAliasedConstraints is:
100 expressions:  41 seconds
150 expressions:  466 seconds

After modified, the cost of getAliasedConstraints is:
100 expressions:  1.8 seconds
150 expressions:  6.5 seconds

The test is like this:
test("getAliasedConstraints") {
    val expressionNum = 150
    val aggExpression = (1 to expressionNum).map(i => Alias(Count(Literal(1)), s"cnt$i")())
    val aggPlan = Aggregate(Nil, aggExpression, LocalRelation())

    val beginTime = System.currentTimeMillis()
    val expressions = aggPlan.validConstraints
    println(s"validConstraints cost: ${System.currentTimeMillis() - beginTime}ms")
    // The size of Aliased expression is n * (n - 1) / 2 + n
    assert( expressions.size === expressionNum * (expressionNum - 1) / 2 + expressionNum)
  }

(Please fill in changes proposed in this fix)

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Run new added test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 10129659 <chen.yanshan@zte.com.cn>

Closes #19022 from eatoncys/getAliasedConstraints.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b8aaef49
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b8aaef49
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b8aaef49

Branch: refs/heads/master
Commit: b8aaef49fbf02401c874b06d17cbe354f739b9e7
Parents: 6942aee
Author: 10129659 <chen.yanshan@zte.com.cn>
Authored: Wed Aug 23 20:35:08 2017 -0700
Committer: gatorsmile <gatorsmile@gmail.com>
Committed: Wed Aug 23 20:35:08 2017 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/expressions/ExpressionSet.scala      | 8 +++++++-
 .../spark/sql/catalyst/expressions/ExpressionSetSuite.scala | 9 +++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b8aaef49/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
index ede0b16..305ac90 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import scala.collection.mutable
+import scala.collection.{mutable, GenTraversableOnce}
 import scala.collection.mutable.ArrayBuffer
 
 object ExpressionSet {
@@ -67,6 +67,12 @@ class ExpressionSet protected(
     newSet
   }
 
+  override def ++(elems: GenTraversableOnce[Expression]): ExpressionSet = {
+    val newSet = new ExpressionSet(baseSet.clone(), originals.clone())
+    elems.foreach(newSet.add)
+    newSet
+  }
+
   override def -(elem: Expression): ExpressionSet = {
     val newBaseSet = baseSet.clone().filterNot(_ == elem.canonicalized)
     val newOriginals = originals.clone().filterNot(_.canonicalized == elem.canonicalized)

http://git-wip-us.apache.org/repos/asf/spark/blob/b8aaef49/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
index d617ad5..a1000a0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
@@ -210,4 +210,13 @@ class ExpressionSetSuite extends SparkFunSuite {
     assert((initialSet - (aLower + 1)).size == 0)
 
   }
+
+  test("add multiple elements to set") {
+    val initialSet = ExpressionSet(aUpper + 1 :: Nil)
+    val setToAddWithSameExpression = ExpressionSet(aUpper + 1 :: aUpper + 2 :: Nil)
+    val setToAddWithOutSameExpression = ExpressionSet(aUpper + 3 :: aUpper + 4 :: Nil)
+
+    assert((initialSet ++ setToAddWithSameExpression).size == 2)
+    assert((initialSet ++ setToAddWithOutSameExpression).size == 3)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message