spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yh...@apache.org
Subject spark git commit: [SPARK-13749][SQL][FOLLOW-UP] Faster pivot implementation for many distinct values with two phase aggregation
Date Tue, 03 May 2016 05:47:57 GMT
Repository: spark
Updated Branches:
  refs/heads/master bb9ab56b9 -> d8f528ceb


[SPARK-13749][SQL][FOLLOW-UP] Faster pivot implementation for many distinct values with two
phase aggregation

## What changes were proposed in this pull request?

This is a follow up PR for #11583. It makes 3 lazy vals into just vals and adds unit test
coverage.

## How was this patch tested?

Existing unit tests and additional unit tests.

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #12861 from aray/fast-pivot-follow-up.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d8f528ce
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d8f528ce
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d8f528ce

Branch: refs/heads/master
Commit: d8f528ceb61e3c2ac7ac97cd8147dafbb625932f
Parents: bb9ab56
Author: Andrew Ray <ray.andrew@gmail.com>
Authored: Mon May 2 22:47:32 2016 -0700
Committer: Yin Huai <yhuai@databricks.com>
Committed: Mon May 2 22:47:32 2016 -0700

----------------------------------------------------------------------
 .../expressions/aggregate/PivotFirst.scala         |  6 +++---
 .../org/apache/spark/sql/DataFramePivotSuite.scala | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d8f528ce/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
index 9154e96..9ead571 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
@@ -141,12 +141,12 @@ case class PivotFirst(
     copy(mutableAggBufferOffset = newMutableAggBufferOffset)
 
 
-  override lazy val aggBufferAttributes: Seq[AttributeReference] =
+  override val aggBufferAttributes: Seq[AttributeReference] =
     pivotIndex.toList.sortBy(_._2).map(kv => AttributeReference(kv._1.toString, valueDataType)())
 
-  override lazy val aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes)
+  override val aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes)
 
-  override lazy val inputAggBufferAttributes: Seq[AttributeReference] =
+  override val inputAggBufferAttributes: Seq[AttributeReference] =
     aggBufferAttributes.map(_.newInstance())
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d8f528ce/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index b17284a..c6d6751 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -180,4 +180,21 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
     )
   }
 
+  test("pivot with datatype not supported by PivotFirst") {
+    checkAnswer(
+      complexData.groupBy().pivot("b", Seq(true, false)).agg(max("a")),
+      Row(Seq(1, 1, 1), Seq(2, 2, 2)) :: Nil
+    )
+  }
+
+  test("pivot with datatype not supported by PivotFirst 2") {
+    checkAnswer(
+      courseSales.withColumn("e", expr("array(earnings, 7.0d)"))
+        .groupBy("year")
+        .pivot("course", Seq("dotNET", "Java"))
+        .agg(min($"e")),
+      Row(2012, Seq(5000.0, 7.0), Seq(20000.0, 7.0)) ::
+        Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message