spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dongj...@apache.org
Subject [spark] branch master updated: [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id)
Date Mon, 20 May 2019 02:26:47 GMT
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2431ab0  [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup,
grouping and grouping_id)
2431ab0 is described below

commit 2431ab0999dbb322dcefeb9b1671d935945dc29a
Author: HyukjinKwon <gurwls223@apache.org>
AuthorDate: Sun May 19 19:26:20 2019 -0700

    [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping
and grouping_id)
    
    ## What changes were proposed in this pull request?
    
    Both look added as of 2.0 (see SPARK-12541 and SPARK-12706). I referred existing docs
and examples in other API docs.
    
    ## How was this patch tested?
    
    Manually built the documentation and, by running examples, by running `DESCRIBE FUNCTION
EXTENDED`.
    
    Closes #24642 from HyukjinKwon/SPARK-27771.
    
    Authored-by: HyukjinKwon <gurwls223@apache.org>
    Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/expressions/grouping.scala  | 74 ++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
index 3be761c..b8ff455 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
@@ -38,14 +38,65 @@ trait GroupingSet extends Expression with CodegenFallback {
   override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
+      so that we can run aggregation on them.
+  """,
+  examples = """
+    Examples:
+      > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name)
GROUP BY _FUNC_(name, age);
+        NULL    2       1
+        NULL    NULL    2
+        Alice   2       1
+        Bob     5       1
+        NULL    5       1
+        Bob     NULL    1
+        Alice   NULL    1
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
+      so that we can run aggregation on them.
+  """,
+  examples = """
+    Examples:
+      > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name)
GROUP BY _FUNC_(name, age);
+        NULL    NULL    2
+        Alice   2       1
+        Bob     5       1
+        Bob     NULL    1
+        Alice   NULL    1
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
 /**
  * Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
  * GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
  */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
+      not, returns 1 for aggregated or 0 for not aggregated in the result set.",
+  """,
+  examples = """
+    Examples:
+      > SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age,
name) GROUP BY cube(name);
+        Alice   0       2
+        NULL    1       7
+        Bob     0       5
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class Grouping(child: Expression) extends Expression with Unevaluable {
   override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute
:: Nil)
   override def children: Seq[Expression] = child :: Nil
@@ -58,6 +109,29 @@ case class Grouping(child: Expression) extends Expression with Unevaluable
{
  *
  * If groupByExprs is empty, it means all grouping expressions in GroupingSets.
  */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
+      `(grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)`
+  """,
+  examples = """
+    Examples:
+      > SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5,
'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
+        NULL    2       2       165.0
+        Alice   0       2       165.0
+        NULL    2       5       180.0
+        NULL    3       7       172.5
+        Bob     0       5       180.0
+        Bob     1       5       180.0
+        Alice   1       2       165.0
+  """,
+  note = """
+    Input columns should match with grouping columns exactly, or empty (means all the grouping
+    columns).
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable
{
   override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute
:: Nil)
   override def children: Seq[Expression] = groupByExprs


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message