spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lix...@apache.org
Subject spark git commit: [SPARK-17237][SPARK-17458][SQL][BACKPORT-2.0] Preserve aliases that are given for pivot aggregations
Date Sun, 15 Jan 2017 07:39:40 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 08385b765 -> ee4e8faff


[SPARK-17237][SPARK-17458][SQL][BACKPORT-2.0] Preserve aliases that are given for pivot aggregations

## What changes were proposed in this pull request?
This pr is to preserve aliases that are given for pivot aggregations to solve the issue reported
in `SPARK-17237`. This pivoting adds backticks (e.g. 3_count(\`c\`)) in column names and,
in some cases,
thes causes analysis exceptions  like;
```
scala> val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
scala> df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0)
org.apache.spark.sql.AnalysisException: syntax error in attribute name: `3_count(`y`)`;
  at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.e$1(unresolved.scala:134)
  at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.parseAttributeName(unresolved.scala:144)
...
```
So, this pr also removes these backticks from column names.

## How was this patch tested?
Added a test in `DataFrameAggregateSuite`.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #16565 from maropu/SPARK-17237-3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee4e8faf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee4e8faf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee4e8faf

Branch: refs/heads/branch-2.0
Commit: ee4e8faff487220cc8f7157e2236847f91b310e1
Parents: 08385b7
Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Authored: Sat Jan 14 23:39:31 2017 -0800
Committer: gatorsmile <gatorsmile@gmail.com>
Committed: Sat Jan 14 23:39:31 2017 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 10 +++++++++-
 .../org/apache/spark/sql/DataFrameAggregateSuite.scala    |  8 ++++++++
 .../scala/org/apache/spark/sql/DataFramePivotSuite.scala  | 10 ++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ee4e8faf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 9040ced..9e5ea41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -384,7 +384,15 @@ class Analyzer(
       case Pivot(groupByExprs, pivotColumn, pivotValues, aggregates, child) =>
         val singleAgg = aggregates.size == 1
         def outputName(value: Literal, aggregate: Expression): String = {
-          if (singleAgg) value.toString else value + "_" + aggregate.sql
+          if (singleAgg) {
+            value.toString
+          } else {
+            val suffix = aggregate match {
+              case n: NamedExpression => n.name
+              case _ => toPrettySQL(aggregate)
+            }
+            value + "_" + suffix
+          }
         }
         if (aggregates.forall(a => PivotFirst.supportsDataType(a.dataType))) {
           // Since evaluating |pivotValues| if statements for each input row can get slow
this is an

http://git-wip-us.apache.org/repos/asf/spark/blob/ee4e8faf/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 94da29c..51eef04 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -513,4 +513,12 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext
{
       limit2Df.groupBy("id").count().select($"id"),
       limit2Df.select($"id"))
   }
+
+  test("SPARK-17237 remove backticks in a pivot result schema") {
+    val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
+    checkAnswer(
+      df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0),
+      Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
+    )
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/ee4e8faf/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index 41d3525..51ffe34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -198,6 +198,16 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
     )
   }
 
+  test("pivot preserves aliases if given") {
+    assertResult(
+      Array("year", "dotNET_foo", "dotNET_avg(earnings)", "Java_foo", "Java_avg(earnings)")
+    )(
+      courseSales.groupBy($"year")
+        .pivot("course", Seq("dotNET", "Java"))
+        .agg(sum($"earnings").as("foo"), avg($"earnings")).columns
+    )
+  }
+
   test("pivot with column definition in groupby") {
     checkAnswer(
       courseSales.groupBy(substring(col("course"), 0, 1).as("foo"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message