hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject hive git commit: HIVE-10413 : [CBO] Return path assumes distinct column cant be same as grouping column
Date Fri, 24 Apr 2015 00:33:03 GMT
Repository: hive
Updated Branches:
  refs/heads/master 124eb43a8 -> af3dea4d1


HIVE-10413 : [CBO] Return path assumes distinct column cant be same as grouping column

Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af3dea4d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af3dea4d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af3dea4d

Branch: refs/heads/master
Commit: af3dea4d1bbb74e59e32dbe081f1d10aac25c2ab
Parents: 124eb43
Author: John Pullokkaran <jpullokk@apache.org>
Authored: Thu Apr 23 15:39:00 2015 -0700
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Thu Apr 23 17:27:02 2015 -0700

----------------------------------------------------------------------
 .../calcite/translator/HiveGBOpConvUtil.java    | 43 ++++++++------------
 1 file changed, 16 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/af3dea4d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index d7f5fad..a0e2e67 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -28,7 +28,6 @@ import java.util.Set;
 
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.AggregateCall;
-import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -42,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
@@ -222,18 +220,21 @@ public class HiveGBOpConvUtil {
       }
 
       UDAFAttrs udafAttrs = new UDAFAttrs();
-      udafAttrs.udafParams.addAll(HiveCalciteUtil.getExprNodes(aggCall.getArgList(), aggInputRel,
-          inputOpAf.tabAlias));
+      List<ExprNodeDesc> argExps = HiveCalciteUtil.getExprNodes(aggCall.getArgList(),
aggInputRel,
+          inputOpAf.tabAlias);
+      udafAttrs.udafParams.addAll(argExps);
       udafAttrs.udafName = aggCall.getAggregation().getName();
       udafAttrs.isDistinctUDAF = aggCall.isDistinct();
       List<Integer> argLst = new ArrayList<Integer>(aggCall.getArgList());
       List<Integer> distColIndicesOfUDAF = new ArrayList<Integer>();
       List<Integer> distUDAFParamsIndxInDistExprs = new ArrayList<Integer>();
       for (int i = 0; i < argLst.size(); i++) {
-        // NOTE: distinct expr can not be part of of GB key (we assume plan
-        // gen would have prevented it)
+        // NOTE: distinct expr can be part of of GB key
         if (udafAttrs.isDistinctUDAF) {
-          distColIndicesOfUDAF.add(distParamInRefsToOutputPos.get(argLst.get(i)));
+          ExprNodeDesc argExpr = argExps.get(i);
+          Integer found = ExprNodeDescUtils.indexOf(argExpr, gbInfo.gbKeys);
+          distColIndicesOfUDAF.add(found < 0 ? distParamInRefsToOutputPos.get(argLst.get(i))
+ gbInfo.gbKeys.size() +
+              (gbInfo.grpSets.size() > 0 ? 1 : 0) : found);
           distUDAFParamsIndxInDistExprs.add(distParamInRefsToOutputPos.get(argLst.get(i)));
         } else {
           // TODO: this seems wrong (following what Hive Regular does)
@@ -648,7 +649,6 @@ public class HiveGBOpConvUtil {
     List<String> outputValueColumnNames = new ArrayList<String>();
     ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
     GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
-    int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0);
 
     ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size()
- 1,
         outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
@@ -663,14 +663,17 @@ public class HiveGBOpConvUtil {
       // NOTE: All dist cols have single output col name;
       reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys()
           .size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
+    } else if (!gbInfo.distColIndices.isEmpty()) {
+      // This is the case where distinct cols are part of GB Keys in which case
+      // we still need to add it to out put col names
+      outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
     }
 
     ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys()
         .size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
-    List<List<Integer>> distinctColIndices = getDistColIndices(gbInfo, distColStartIndx);
 
     ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils
-        .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, distinctColIndices,
+        .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices,
             outputKeyColumnNames, outputValueColumnNames, true, -1,
             getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo),
             AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
@@ -685,7 +688,6 @@ public class HiveGBOpConvUtil {
     List<String> outputKeyColumnNames = new ArrayList<String>();
     List<String> outputValueColumnNames = new ArrayList<String>();
     ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
-    int distColStartIndx = gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0);
     String outputColName;
 
     // 1. Add GB Keys to reduce keys
@@ -725,7 +727,7 @@ public class HiveGBOpConvUtil {
     // 4. Gen RS
     ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils
         .getReduceSinkDesc(reduceKeys, keyLength, reduceValues,
-            getDistColIndices(gbInfo, distColStartIndx), outputKeyColumnNames,
+            gbInfo.distColIndices, outputKeyColumnNames,
             outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo),
             getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(
         colInfoLst), inputOpAf.inputs.get(0));
@@ -860,6 +862,7 @@ public class HiveGBOpConvUtil {
           .get(rs.getConf().getOutputKeyColumnNames().size() - 1);
     }
     int numDistinctUDFs = 0;
+
     int distinctStartPosInReduceKeys = gbKeys.size();
     List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
     ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
@@ -882,6 +885,7 @@ public class HiveGBOpConvUtil {
             rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName
                 + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
           }
+
           distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(),
               rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(),
               rsDistUDAFParamColInfo.getIsVirtualCol());
@@ -1215,21 +1219,6 @@ public class HiveGBOpConvUtil {
     return valueKeys;
   }
 
-  private static List<List<Integer>> getDistColIndices(GBInfo gbAttrs, int distOffSet)
-      throws SemanticException {
-    List<List<Integer>> distColIndices = new ArrayList<List<Integer>>();
-
-    for (List<Integer> udafDistCols : gbAttrs.distColIndices) {
-      List<Integer> udfAdjustedDistColIndx = new ArrayList<Integer>();
-      for (Integer distIndx : udafDistCols) {
-        udfAdjustedDistColIndx.add(distIndx + distOffSet);
-      }
-      distColIndices.add(udfAdjustedDistColIndx);
-    }
-
-    return distColIndices;
-  }
-
   // TODO: Implement this
   private static ExprNodeDesc propConstDistUDAFParams() {
     return null;


Mime
View raw message