Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 1BF93200C4C for ; Tue, 4 Apr 2017 19:55:02 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 1A90B160B90; Tue, 4 Apr 2017 17:55:02 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 1AEFC160B77 for ; Tue, 4 Apr 2017 19:54:59 +0200 (CEST) Received: (qmail 13440 invoked by uid 500); 4 Apr 2017 17:54:59 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 13429 invoked by uid 99); 4 Apr 2017 17:54:59 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Apr 2017 17:54:59 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 08B5FDFBAB; Tue, 4 Apr 2017 17:54:58 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jcamacho@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-15996: Implement multiargument GROUPING function (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Date: Tue, 4 Apr 2017 17:54:58 +0000 (UTC) archived-at: Tue, 04 Apr 2017 17:55:02 -0000 Repository: hive Updated Branches: refs/heads/master 90f2a047a -> 1a1e8357b HIVE-15996: Implement multiargument GROUPING function (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a1e8357 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a1e8357 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a1e8357 Branch: refs/heads/master Commit: 1a1e8357bcb09ab7b775f26b83f00d6f687bbc23 Parents: 90f2a04 Author: Jesus Camacho Rodriguez Authored: Mon Feb 27 09:24:06 2017 +0000 Committer: Jesus Camacho Rodriguez Committed: Tue Apr 4 19:54:16 2017 +0200 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 80 ++-- .../hive/ql/udf/generic/GenericUDFGrouping.java | 45 +- .../groupby_grouping_sets_grouping.q | 36 ++ .../vector_groupby_grouping_sets_grouping.q | 36 ++ .../groupby_grouping_sets_grouping.q.out | 366 ++++++++++++++++- .../vector_groupby_grouping_sets_grouping.q.out | 410 ++++++++++++++++++- .../results/clientpositive/perf/query36.q.out | 4 +- .../results/clientpositive/perf/query70.q.out | 4 +- .../results/clientpositive/perf/query86.q.out | 4 +- 9 files changed, 903 insertions(+), 82 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f2a6ade..b2e1c88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -53,7 +53,6 @@ import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; import org.apache.calcite.rel.RelNode; import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -3062,8 +3061,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { protected static ASTNode rewriteGroupingFunctionAST(final List grpByAstExprs, ASTNode targetNode, final boolean noneSet) throws SemanticException { - final MutableBoolean visited = new MutableBoolean(false); - final MutableBoolean found = new MutableBoolean(false); TreeVisitorAction action = new TreeVisitorAction() { @@ -3075,45 +3072,62 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { @Override public Object post(Object t) { ASTNode root = (ASTNode) t; - if (root.getType() == HiveParser.TOK_FUNCTION && root.getChildCount() == 2) { + if (root.getType() == HiveParser.TOK_FUNCTION) { ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0); - if (func.getText().equals("grouping")) { - ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, 1); - visited.setValue(true); - for (int i = 0; i < grpByAstExprs.size(); i++) { - ASTNode grpByExpr = grpByAstExprs.get(i); - if (grpByExpr.toStringTree().equals(c.toStringTree())) { - ASTNode child1; - if (noneSet) { - // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus, - // grouping should return 0 - child1 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, - String.valueOf(0)); - } else { - // We refer to grouping_id column - child1 = (ASTNode) ParseDriver.adaptor.create( - HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); - ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create( - HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + if (func.getText().equals("grouping") && func.getChildCount() == 0) { + int numberOperands = ParseDriver.adaptor.getChildCount(root); + // We implement this logic using replaceChildren instead of replacing + // the root node itself because windowing logic stores multiple + // pointers to the AST, and replacing root might lead to some pointers + // leading to non-rewritten version + ASTNode newRoot = new ASTNode(); + // Rewritten grouping function + ASTNode groupingFunc = (ASTNode) ParseDriver.adaptor.create( + HiveParser.Identifier, "grouping"); + ParseDriver.adaptor.addChild(groupingFunc, ParseDriver.adaptor.create( + HiveParser.Identifier, "rewritten")); + newRoot.addChild(groupingFunc); + // Grouping ID reference + ASTNode childGroupingID; + if (noneSet) { + // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus, + // grouping should return 0 + childGroupingID = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, + String.valueOf(0)); + } else { + // We refer to grouping_id column + childGroupingID = (ASTNode) ParseDriver.adaptor.create( + HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); + ParseDriver.adaptor.addChild(childGroupingID, ParseDriver.adaptor.create( + HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + } + newRoot.addChild(childGroupingID); + // Indices + for (int i = 1; i < numberOperands; i++) { + ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, i); + for (int j = 0; j < grpByAstExprs.size(); j++) { + ASTNode grpByExpr = grpByAstExprs.get(j); + if (grpByExpr.toStringTree().equals(c.toStringTree())) { + // Create and add AST node with position of grouping function input + // in group by clause + ASTNode childN = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, + String.valueOf(IntMath.mod(-j-1, grpByAstExprs.size()))); + newRoot.addChild(childN); + break; } - ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, - String.valueOf(IntMath.mod(-i-1, grpByAstExprs.size()))); - root.setChild(1, child1); - root.addChild(child2); - found.setValue(true); - break; } } + if (numberOperands + 1 != ParseDriver.adaptor.getChildCount(newRoot)) { + throw new RuntimeException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg()); + } + // Replace expression + root.replaceChildren(0, numberOperands - 1, newRoot); } } return t; } }; - ASTNode newTargetNode = (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action); - if (visited.booleanValue() && !found.booleanValue()) { - throw new SemanticException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg()); - } - return newTargetNode; + return (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action); } private Operator genPlanForSubQueryPredicate( http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java index cc01526..cee0e14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -32,27 +31,29 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspecto import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; +import org.apache.hadoop.io.IntWritable; + +import com.google.common.math.IntMath; /** * UDF grouping */ @Description(name = "grouping", -value = "_FUNC_(a, b) - Indicates whether a specified column expression in " +value = "_FUNC_(a, p1, ..., pn) - Indicates whether a specified column expression in " + "is aggregated or not. Returns 1 for aggregated or 0 for not aggregated. ", -extended = "a is the grouping id, b is the index we want to extract") +extended = "a is the grouping id, p1...pn are the indices we want to extract") @UDFType(deterministic = true) -@NDV(maxNdv = 2) public class GenericUDFGrouping extends GenericUDF { private transient IntObjectInspector groupingIdOI; - private int index = 0; - private ByteWritable byteWritable = new ByteWritable(); + private int[] indices; + private IntWritable intWritable = new IntWritable(); @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { + if (arguments.length < 2) { throw new UDFArgumentLengthException( - "grouping() requires 2 argument, got " + arguments.length); + "grouping() requires at least 2 argument, got " + arguments.length); } if (arguments[0].getCategory() != Category.PRIMITIVE) { @@ -64,27 +65,37 @@ public class GenericUDFGrouping extends GenericUDF { } groupingIdOI = (IntObjectInspector) arguments[0]; - PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[1]; - if (!(arg2OI instanceof WritableConstantIntObjectInspector)) { - throw new UDFArgumentTypeException(1, "The second argument to grouping() must be a constant"); + indices = new int[arguments.length - 1]; + for (int i = 1; i < arguments.length; i++) { + PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[i]; + if (!(arg2OI instanceof WritableConstantIntObjectInspector)) { + throw new UDFArgumentTypeException(i, "Must be a constant"); + } + indices[i - 1] = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get(); } - index = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get(); - return PrimitiveObjectInspectorFactory.writableByteObjectInspector; + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { // groupingId = PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI); // Check that the bit at the given index is '1' or '0' - byteWritable.set((byte) - ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> index) & 1)); - return byteWritable; + int result = 0; + // grouping(c1, c2, c3) + // is equivalent to + // 4 * grouping(c1) + 2 * grouping(c2) + grouping(c3) + for (int a = 1; a < arguments.length; a++) { + result += IntMath.pow(2, indices.length - a) * + ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> indices[a - 1]) & 1); + } + intWritable.set(result); + return intWritable; } @Override public String getDisplayString(String[] children) { - assert (children.length == 2); + assert (children.length > 1); return getStandardDisplayString("grouping", children); } http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 34759ca..7157106 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -119,3 +119,39 @@ select key, value from T1 group by key, value having grouping(key) = 0; + +explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value); + +explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value); + +explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value); http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q index 7b7c892..b9c2a7b 100644 --- a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q +++ b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q @@ -97,3 +97,39 @@ from T1 group by cube(key, value) having grouping(key) = 1 OR grouping(value) = 1 order by x desc, case when x = 1 then key end; + +explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value); + +explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value); + +explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value); http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out b/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index b82d9c2..473d17a 100644 --- a/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ b/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -56,7 +56,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -138,7 +138,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -227,7 +227,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + predicate: (grouping(_col2, 1) = 1) (type: boolean) Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) @@ -314,10 +314,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -332,13 +332,13 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) sort order: -+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -423,7 +423,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -505,7 +505,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -681,7 +681,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -696,13 +696,13 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) sort order: -+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -787,7 +787,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), 0 (type: tinyint), 0 (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), 0 (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -864,7 +864,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), 0 (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -976,3 +976,341 @@ POSTHOOK: Input: default@t1 3 3 3 NULL 4 5 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 1 +2 2 0 0 +2 NULL 1 1 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 1 +4 5 0 0 +4 NULL 1 1 +NULL 1 2 2 +NULL 2 2 2 +NULL 3 2 2 +NULL 5 2 2 +NULL NULL 2 2 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 2 +2 2 0 0 +2 NULL 1 2 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 2 +4 5 0 0 +4 NULL 1 2 +NULL 1 2 1 +NULL 2 2 1 +NULL 3 2 1 +NULL 5 2 1 +NULL NULL 2 1 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 1 +2 2 0 0 +2 NULL 1 1 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 1 +4 5 0 0 +4 NULL 1 1 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 2 +2 2 0 0 +2 NULL 1 2 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 2 +4 5 0 0 +4 NULL 1 2 +NULL NULL 3 3 http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 8696207..5af9e61 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -80,7 +80,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -174,7 +174,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -275,7 +275,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + predicate: (grouping(_col2, 1) = 1) (type: boolean) Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) @@ -374,14 +374,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) sort order: -+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) @@ -389,7 +389,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -486,7 +486,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -580,7 +580,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -780,11 +780,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) sort order: -+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) @@ -792,7 +792,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -836,3 +836,389 @@ NULL 3 1 NULL 5 1 NULL NULL 1 NULL NULL 2 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 0 +1 NULL 0 0 +1 NULL 1 1 +2 2 0 0 +2 NULL 1 1 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 1 +4 5 0 0 +4 NULL 1 1 +NULL 1 2 2 +NULL 2 2 2 +NULL 3 2 2 +NULL 5 2 2 +NULL NULL 2 2 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 0 +1 NULL 0 0 +1 NULL 1 2 +2 2 0 0 +2 NULL 1 2 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 2 +4 5 0 0 +4 NULL 1 2 +NULL 1 2 1 +NULL 2 2 1 +NULL 3 2 1 +NULL 5 2 1 +NULL NULL 2 1 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 0 +1 NULL 0 0 +1 NULL 1 1 +2 2 0 0 +2 NULL 1 1 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 1 +4 5 0 0 +4 NULL 1 1 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 0 0 +1 NULL 0 0 +1 NULL 1 2 +2 2 0 0 +2 NULL 1 2 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 2 +4 5 0 0 +4 NULL 1 2 +NULL NULL 3 3 http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/perf/query36.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query36.q.out b/ql/src/test/results/clientpositive/perf/query36.q.out index b356628..57ab26a 100644 --- a/ql/src/test/results/clientpositive/perf/query36.q.out +++ b/ql/src/test/results/clientpositive/perf/query36.q.out @@ -81,12 +81,12 @@ Stage-0 Select Operator [SEL_30] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] PTF Operator [PTF_29] (rows=1149975358 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col4 / _col5) ASC NULLS FIRST","partition by:":"(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((UDFToInteger(grouping(_col6, 0)) = 0)) THEN (_col0) ELSE (null) END"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col4 / _col5) ASC NULLS FIRST","partition by:":"(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END"}] Select Operator [SEL_28] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_27] - PartitionCols:(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((UDFToInteger(grouping(_col6, 0)) = 0)) THEN (_col0) ELSE (null) END + PartitionCols:(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END Select Operator [SEL_26] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5","_col6"] Group By Operator [GBY_25] (rows=1149975358 width=88) http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/perf/query70.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out index bf90cdd..96fba40 100644 --- a/ql/src/test/results/clientpositive/perf/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/query70.q.out @@ -101,12 +101,12 @@ Stage-0 Select Operator [SEL_57] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] PTF Operator [PTF_56] (rows=1149975358 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END"}] Select Operator [SEL_55] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_54] - PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END + PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END Select Operator [SEL_53] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] Group By Operator [GBY_52] (rows=1149975358 width=88) http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/perf/query86.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query86.q.out b/ql/src/test/results/clientpositive/perf/query86.q.out index 6377c43..734e6a4 100644 --- a/ql/src/test/results/clientpositive/perf/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/query86.q.out @@ -72,12 +72,12 @@ Stage-0 Select Operator [SEL_24] (rows=261364852 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] PTF Operator [PTF_23] (rows=261364852 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END"}] Select Operator [SEL_22] (rows=261364852 width=135) Output:["_col0","_col1","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] - PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END + PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END Select Operator [SEL_20] (rows=261364852 width=135) Output:["_col0","_col1","_col4","_col5"] Group By Operator [GBY_19] (rows=261364852 width=135)