Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B978B18FEC for ; Wed, 24 Jun 2015 21:14:36 +0000 (UTC) Received: (qmail 26185 invoked by uid 500); 24 Jun 2015 21:14:36 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 26138 invoked by uid 500); 24 Jun 2015 21:14:36 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 26127 invoked by uid 99); 24 Jun 2015 21:14:36 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 24 Jun 2015 21:14:36 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 72C5EE35E5; Wed, 24 Jun 2015 21:14:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jdere@apache.org To: commits@hive.apache.org Message-Id: <3bc0b9311415495ebceada553bf63eb4@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-9248: Vectorization : Tez Reduce vertex not getting vectorized when GROUP BY is Hash mode (Matt McCline via Jason Dere) Date: Wed, 24 Jun 2015 21:14:36 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master c8a41f1ca -> ebd294a94 HIVE-9248: Vectorization : Tez Reduce vertex not getting vectorized when GROUP BY is Hash mode (Matt McCline via Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ebd294a9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ebd294a9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ebd294a9 Branch: refs/heads/master Commit: ebd294a94c797fa01af5523f95830e465fea57b4 Parents: c8a41f1 Author: Jason Dere Authored: Wed Jun 24 14:13:53 2015 -0700 Committer: Jason Dere Committed: Wed Jun 24 14:13:53 2015 -0700 ---------------------------------------------------------------------- .../ql/exec/vector/VectorGroupByOperator.java | 16 +- .../ql/exec/vector/VectorizationContext.java | 14 +- .../hive/ql/optimizer/physical/Vectorizer.java | 147 ++++++++++--------- .../hadoop/hive/ql/plan/VectorGroupByDesc.java | 23 +-- .../exec/vector/TestVectorGroupByOperator.java | 2 +- .../clientpositive/vector_groupby_reduce.q | 3 + .../tez/vector_count_distinct.q.out | 1 + .../tez/vector_groupby_reduce.q.out | 10 +- .../tez/vectorized_distinct_gby.q.out | 1 + .../vectorized_dynamic_partition_pruning.q.out | 8 + .../clientpositive/vector_groupby_reduce.q.out | 10 +- 11 files changed, 133 insertions(+), 102 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 39a83e3..917f406 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -675,7 +675,7 @@ public class VectorGroupByOperator extends Operator implements * writeGroupRow does this and finally increments outputBatch.size. * */ - private class ProcessingModeGroupBatches extends ProcessingModeBase { + private class ProcessingModeReduceMergePartialKeys extends ProcessingModeBase { private boolean inGroup; private boolean first; @@ -761,7 +761,8 @@ public class VectorGroupByOperator extends Operator implements aggregators = new VectorAggregateExpression[aggrDesc.size()]; for (int i = 0; i < aggrDesc.size(); ++i) { AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduce()); + aggregators[i] = + vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduceMergePartial()); } isVectorOutput = desc.getVectorDesc().isVectorOutput(); @@ -803,8 +804,8 @@ public class VectorGroupByOperator extends Operator implements objectInspectors.add(aggregators[i].getOutputObjectInspector()); } - if (!conf.getVectorDesc().isVectorGroupBatches()) { - // These data structures are only used by the map-side processing modes. + if (outputKeyLength > 0 && !conf.getVectorDesc().isReduceMergePartial()) { + // These data structures are only used by the non Reduce Merge-Partial Keys processing modes. keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); aggregationBatchInfo = new VectorAggregationBufferBatch(); aggregationBatchInfo.compileAggregationBatchInfo(aggregators); @@ -830,10 +831,11 @@ public class VectorGroupByOperator extends Operator implements forwardCache = new Object[outputKeyLength + aggregators.length]; if (outputKeyLength == 0) { - processingMode = this.new ProcessingModeGlobalAggregate(); - } else if (conf.getVectorDesc().isVectorGroupBatches()) { + // Hash and MergePartial global aggregation are both handled here. + processingMode = this.new ProcessingModeGlobalAggregate(); + } else if (conf.getVectorDesc().isReduceMergePartial()) { // Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce). - processingMode = this.new ProcessingModeGroupBatches(); + processingMode = this.new ProcessingModeReduceMergePartialKeys(); } else { // We start in hash mode and may dynamically switch to unsorted stream mode. processingMode = this.new ProcessingModeHashAggregate(); http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 392e56d..8fbf064 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2138,7 +2138,7 @@ public class VectorizationContext { add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; - public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) + public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduceMergePartial) throws HiveException { ArrayList paramDescList = desc.getParameters(); @@ -2166,11 +2166,11 @@ public class VectorizationContext { inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) { - if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduce) { - continue; - } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduce) { - continue; - } + if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduceMergePartial) { + continue; + } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduceMergePartial) { + continue; + } Class aggClass = aggDef.getAggClass(); try @@ -2189,7 +2189,7 @@ public class VectorizationContext { } throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + - "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); + "\" for type: \"" + inputType.name() + " (reduce-merge-partial = " + isReduceMergePartial + ")"); } public int firstOutputColumnIndex() { http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e7b9c73..6e86d69 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -837,11 +837,6 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); } } - if (vectorOp instanceof VectorGroupByOperator) { - VectorGroupByOperator groupBy = (VectorGroupByOperator) vectorOp; - VectorGroupByDesc vectorDesc = groupBy.getConf().getVectorDesc(); - vectorDesc.setVectorGroupBatches(true); - } if (saveRootVectorOp && op != vectorOp) { rootVectorOp = vectorOp; } @@ -1127,42 +1122,67 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.info("Cannot vectorize groupby key expression"); return false; } - ret = validateAggregationDesc(desc.getAggregators(), isReduce); - if (!ret) { - LOG.info("Cannot vectorize groupby aggregate expression"); - return false; - } - if (isReduce) { - if (desc.isDistinct()) { - LOG.info("Distinct not supported in reduce vector mode"); - return false; - } - // Sort-based GroupBy? - if (desc.getMode() != GroupByDesc.Mode.COMPLETE && - desc.getMode() != GroupByDesc.Mode.PARTIAL1 && - desc.getMode() != GroupByDesc.Mode.PARTIAL2 && - desc.getMode() != GroupByDesc.Mode.MERGEPARTIAL) { - LOG.info("Reduce vector mode not supported when input for GROUP BY not sorted"); - return false; - } - LOG.info("Reduce GROUP BY mode is " + desc.getMode().name()); - if (!aggregatorsOutputIsPrimitive(desc.getAggregators(), isReduce)) { - LOG.info("Reduce vector mode only supported when aggregate outputs are primitive types"); + + boolean isMergePartial = (desc.getMode() != GroupByDesc.Mode.HASH); + + if (!isReduce) { + + // MapWork + + ret = validateHashAggregationDesc(desc.getAggregators()); + if (!ret) { return false; } - if (desc.getKeys().size() > 0) { - if (op.getParentOperators().size() > 0) { - LOG.info("Reduce vector mode can only handle a key group GROUP BY operator when it is fed by reduce-shuffle"); + } else { + + // ReduceWork + + if (isMergePartial) { + + // Reduce Merge-Partial GROUP BY. + + // A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the + // first (or root) operator for its reduce task. + + if (desc.isDistinct()) { + LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT"); return false; } - LOG.info("Reduce-side GROUP BY will process key groups"); - vectorDesc.setVectorGroupBatches(true); + + boolean hasKeys = (desc.getKeys().size() > 0); + + // Do we support merge-partial aggregation AND the output is primitive? + ret = validateReduceMergePartialAggregationDesc(desc.getAggregators(), hasKeys); + if (!ret) { + return false; + } + + if (hasKeys) { + if (op.getParentOperators().size() > 0) { + LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle"); + return false; + } + + LOG.info("Vectorized Reduce MergePartial GROUP BY will process key groups"); + + // Primitive output validation above means we can output VectorizedRowBatch to the + // children operators. + vectorDesc.setVectorOutput(true); + } else { + LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation"); + } + vectorDesc.setIsReduceMergePartial(true); } else { - LOG.info("Reduce-side GROUP BY will do global aggregation"); + + // Reduce Hash GROUP BY or global aggregation. + + ret = validateHashAggregationDesc(desc.getAggregators()); + if (!ret) { + return false; + } } - vectorDesc.setVectorOutput(true); - vectorDesc.setIsReduce(true); } + return true; } @@ -1185,9 +1205,18 @@ public class Vectorizer implements PhysicalPlanResolver { return true; } - private boolean validateAggregationDesc(List descs, boolean isReduce) { + + private boolean validateHashAggregationDesc(List descs) { + return validateAggregationDesc(descs, /* isReduceMergePartial */ false, false); + } + + private boolean validateReduceMergePartialAggregationDesc(List descs, boolean hasKeys) { + return validateAggregationDesc(descs, /* isReduceMergePartial */ true, hasKeys); + } + + private boolean validateAggregationDesc(List descs, boolean isReduceMergePartial, boolean hasKeys) { for (AggregationDesc d : descs) { - boolean ret = validateAggregationDesc(d, isReduce); + boolean ret = validateAggregationDesc(d, isReduceMergePartial, hasKeys); if (!ret) { return false; } @@ -1264,7 +1293,14 @@ public class Vectorizer implements PhysicalPlanResolver { } } - private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) { + private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) { + ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); + return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE); + } + + private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduceMergePartial, + boolean hasKeys) { + String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); @@ -1274,47 +1310,24 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); return false; } - // See if we can vectorize the aggregation. - try { - VectorizationContext vc = new ValidatorVectorizationContext(); - if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { - // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getAggregatorExpression returned null"); - return false; - } - } catch (Exception e) { - LOG.info("Failed to vectorize", e); - return false; - } - return true; - } - - private boolean aggregatorsOutputIsPrimitive(List descs, boolean isReduce) { - for (AggregationDesc d : descs) { - boolean ret = aggregatorsOutputIsPrimitive(d, isReduce); - if (!ret) { - return false; - } - } - return true; - } - private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isReduce) { + // See if we can vectorize the aggregation. VectorizationContext vc = new ValidatorVectorizationContext(); VectorAggregateExpression vectorAggrExpr; try { - vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduce); + vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduceMergePartial); } catch (Exception e) { // We should have already attempted to vectorize in validateAggregationDesc. LOG.info("Vectorization of aggreation should have succeeded ", e); return false; } - ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); - if (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE) { - return true; + if (isReduceMergePartial && hasKeys && !validateAggregationIsPrimitive(vectorAggrExpr)) { + LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); + return false; } - return false; + + return true; } private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) { http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index b92c38b..7e791f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -30,30 +30,21 @@ public class VectorGroupByDesc extends AbstractVectorDesc { private static long serialVersionUID = 1L; - private boolean isReduce; - private boolean isVectorGroupBatches; + private boolean isReduceMergePartial; + private boolean isVectorOutput; public VectorGroupByDesc() { - this.isReduce = false; - this.isVectorGroupBatches = false; + this.isReduceMergePartial = false; this.isVectorOutput = false; } - public boolean isReduce() { - return isReduce; - } - - public void setIsReduce(boolean isReduce) { - this.isReduce = isReduce; - } - - public boolean isVectorGroupBatches() { - return isVectorGroupBatches; + public boolean isReduceMergePartial() { + return isReduceMergePartial; } - public void setVectorGroupBatches(boolean isVectorGroupBatches) { - this.isVectorGroupBatches = isVectorGroupBatches; + public void setIsReduceMergePartial(boolean isReduceMergePartial) { + this.isReduceMergePartial = isReduceMergePartial; } public boolean isVectorOutput() { http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 8c84f30..fdcf103 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -2268,7 +2268,7 @@ public class TestVectorGroupByOperator { GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - vectorDesc.setIsReduce(true); + vectorDesc.setIsReduceMergePartial(true); VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/queries/clientpositive/vector_groupby_reduce.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_groupby_reduce.q b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q index a6b6b1b..1438c29 100644 --- a/ql/src/test/queries/clientpositive/vector_groupby_reduce.q +++ b/ql/src/test/queries/clientpositive/vector_groupby_reduce.q @@ -105,6 +105,9 @@ from group by ss_ticket_number limit 20; +-- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... explain select min(ss_ticket_number) http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out index e6d34ff..95863a3 100644 --- a/ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out @@ -1287,6 +1287,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out index 637bb3b..ba2fb77 100644 --- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out @@ -321,7 +321,10 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from @@ -333,7 +336,10 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out index 932b175..23798e0 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out @@ -138,6 +138,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index add8a65..b253508 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -2870,6 +2870,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 8 Reduce Operator Tree: Group By Operator @@ -2905,6 +2906,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 6 Vertex: Union 6 @@ -3070,6 +3072,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 8 Reduce Operator Tree: Group By Operator @@ -3105,6 +3108,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 6 Vertex: Union 6 @@ -3277,6 +3281,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 5 + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -3370,6 +3375,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 5 + Execution mode: vectorized Union 3 Vertex: Union 3 Union 9 @@ -4962,6 +4968,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 7 Reduce Operator Tree: Group By Operator @@ -4997,6 +5004,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 5 Vertex: Union 5 http://git-wip-us.apache.org/repos/asf/hive/blob/ebd294a9/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index fe79b9c..edcb0b3 100644 --- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -314,7 +314,10 @@ POSTHOOK: Input: default@store_sales 18 19 20 -PREHOOK: query: explain +PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from @@ -326,7 +329,10 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from