From commits-return-36926-archive-asf-public=cust-asf.ponee.io@spark.apache.org Mon May 20 02:26:49 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 59B0B180663 for ; Mon, 20 May 2019 04:26:49 +0200 (CEST) Received: (qmail 45353 invoked by uid 500); 20 May 2019 02:26:48 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 45344 invoked by uid 99); 20 May 2019 02:26:48 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 20 May 2019 02:26:48 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 53F8185CF5; Mon, 20 May 2019 02:26:48 +0000 (UTC) Date: Mon, 20 May 2019 02:26:47 +0000 To: "commits@spark.apache.org" Subject: [spark] branch master updated: [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <155831920568.2089.16724954127558647206@gitbox.apache.org> From: dongjoon@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: spark X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 1a8c09334db87b0e938c38cd6b59d326bdcab3c3 X-Git-Newrev: 2431ab0999dbb322dcefeb9b1671d935945dc29a X-Git-Rev: 2431ab0999dbb322dcefeb9b1671d935945dc29a X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2431ab0 [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id) 2431ab0 is described below commit 2431ab0999dbb322dcefeb9b1671d935945dc29a Author: HyukjinKwon AuthorDate: Sun May 19 19:26:20 2019 -0700 [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id) ## What changes were proposed in this pull request? Both look added as of 2.0 (see SPARK-12541 and SPARK-12706). I referred existing docs and examples in other API docs. ## How was this patch tested? Manually built the documentation and, by running examples, by running `DESCRIBE FUNCTION EXTENDED`. Closes #24642 from HyukjinKwon/SPARK-27771. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/expressions/grouping.scala | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala index 3be761c..b8ff455 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala @@ -38,14 +38,65 @@ trait GroupingSet extends Expression with CodegenFallback { override def eval(input: InternalRow): Any = throw new UnsupportedOperationException } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns + so that we can run aggregation on them. + """, + examples = """ + Examples: + > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age); + NULL 2 1 + NULL NULL 2 + Alice 2 1 + Bob 5 1 + NULL 5 1 + Bob NULL 1 + Alice NULL 1 + """, + since = "2.0.0") +// scalastyle:on line.size.limit case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {} +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns + so that we can run aggregation on them. + """, + examples = """ + Examples: + > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age); + NULL NULL 2 + Alice 2 1 + Bob 5 1 + Bob NULL 1 + Alice NULL 1 + """, + since = "2.0.0") +// scalastyle:on line.size.limit case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {} /** * Indicates whether a specified column expression in a GROUP BY list is aggregated or not. * GROUPING returns 1 for aggregated or 0 for not aggregated in the result set. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or + not, returns 1 for aggregated or 0 for not aggregated in the result set.", + """, + examples = """ + Examples: + > SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name); + Alice 0 2 + NULL 1 7 + Bob 0 5 + """, + since = "2.0.0") +// scalastyle:on line.size.limit case class Grouping(child: Expression) extends Expression with Unevaluable { override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute :: Nil) override def children: Seq[Expression] = child :: Nil @@ -58,6 +109,29 @@ case class Grouping(child: Expression) extends Expression with Unevaluable { * * If groupByExprs is empty, it means all grouping expressions in GroupingSets. */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to + `(grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)` + """, + examples = """ + Examples: + > SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height); + NULL 2 2 165.0 + Alice 0 2 165.0 + NULL 2 5 180.0 + NULL 3 7 172.5 + Bob 0 5 180.0 + Bob 1 5 180.0 + Alice 1 2 165.0 + """, + note = """ + Input columns should match with grouping columns exactly, or empty (means all the grouping + columns). + """, + since = "2.0.0") +// scalastyle:on line.size.limit case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable { override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute :: Nil) override def children: Seq[Expression] = groupByExprs --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org