hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From harisan...@apache.org
Subject hive git commit: HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)
Date Wed, 15 Jul 2015 20:15:47 GMT
Repository: hive
Updated Branches:
  refs/heads/master 4d984bded -> 423269581


HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong,
reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/42326958
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/42326958
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/42326958

Branch: refs/heads/master
Commit: 42326958148c2558be9c3d4dfe44c9e735704617
Parents: 4d984bd
Author: Hari Subramaniyan <harisankar@apache.org>
Authored: Wed Jul 15 13:15:34 2015 -0700
Committer: Hari Subramaniyan <harisankar@apache.org>
Committed: Wed Jul 15 13:15:34 2015 -0700

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java | 42 ++++++++++----------
 1 file changed, 22 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/42326958/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 0982059..376d42c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1013,17 +1013,14 @@ public class StatsRulesProcFactory {
    */
   public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor {
 
-    private boolean pkfkInferred = false;
-    private long newNumRows = 0;
-    private List<Operator<? extends OperatorDesc>> parents;
-    private CommonJoinOperator<? extends JoinDesc> jop;
-    private int numAttr = 1;
 
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
-      jop = (CommonJoinOperator<? extends JoinDesc>) nd;
-      parents = jop.getParentOperators();
+      long newNumRows = 0;
+      CommonJoinOperator<? extends JoinDesc> jop = (CommonJoinOperator<? extends
JoinDesc>) nd;
+      List<Operator<? extends OperatorDesc>> parents = jop.getParentOperators();
+      int numAttr = 1;
       AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
       HiveConf conf = aspCtx.getConf();
       boolean allStatsAvail = true;
@@ -1062,7 +1059,7 @@ public class StatsRulesProcFactory {
           numAttr = keyExprs.size();
 
           // infer PK-FK relationship in single attribute join case
-          inferPKFKRelationship();
+          long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop);
           // get the join keys from parent ReduceSink operators
           for (int pos = 0; pos < parents.size(); pos++) {
             ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
@@ -1149,7 +1146,7 @@ public class StatsRulesProcFactory {
 
           // update join statistics
           stats.setColumnStats(outColStats);
-          long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom);
+          long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts,
denom);
           updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
           jop.setStatistics(stats);
 
@@ -1180,7 +1177,7 @@ public class StatsRulesProcFactory {
           }
 
           long maxDataSize = parentSizes.get(maxRowIdx);
-          long newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents
- 1)), joinFactor);
+          newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents -
1)), joinFactor);
           long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents
- 1)), joinFactor);
           Statistics wcStats = new Statistics();
           wcStats.setNumRows(newNumRows);
@@ -1195,15 +1192,17 @@ public class StatsRulesProcFactory {
       return null;
     }
 
-    private void inferPKFKRelationship() {
+    private long inferPKFKRelationship(int numAttr, List<Operator<? extends OperatorDesc>>
parents,
+        CommonJoinOperator<? extends JoinDesc> jop) {
+      long newNumRows = -1;
       if (numAttr == 1) {
         // If numAttr is 1, this means we join on one single key column.
         Map<Integer, ColStatistics> parentsWithPK = getPrimaryKeyCandidates(parents);
 
         // We only allow one single PK.
         if (parentsWithPK.size() != 1) {
-          LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents.");
-          return;
+          LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK parents.");
+          return newNumRows;
         }
         Integer pkPos = parentsWithPK.keySet().iterator().next();
         ColStatistics csPK = parentsWithPK.values().iterator().next();
@@ -1215,7 +1214,7 @@ public class StatsRulesProcFactory {
         // csfKs.size() + 1 == parents.size() means we have a single PK and all
         // the rest ops are FKs.
         if (csFKs.size() + 1 == parents.size()) {
-          getSelectivity(parents, pkPos, csPK, csFKs);
+          newNumRows = getCardinality(parents, pkPos, csPK, csFKs, jop);
 
           // some debug information
           if (isDebugEnabled) {
@@ -1236,16 +1235,17 @@ public class StatsRulesProcFactory {
           }
         }
       }
+      return newNumRows;
     }
 
     /**
-     * Get selectivity of reduce sink operators.
+     * Get cardinality of reduce sink operators.
      * @param csPK - ColStatistics for a single primary key
      * @param csFKs - ColStatistics for multiple foreign keys
      */
-    private void getSelectivity(List<Operator<? extends OperatorDesc>> ops, Integer
pkPos, ColStatistics csPK,
-        Map<Integer, ColStatistics> csFKs) {
-      this.pkfkInferred = true;
+    private long getCardinality(List<Operator<? extends OperatorDesc>> ops, Integer
pkPos,
+        ColStatistics csPK, Map<Integer, ColStatistics> csFKs,
+        CommonJoinOperator<? extends JoinDesc> jop) {
       double pkfkSelectivity = Double.MAX_VALUE;
       int fkInd = -1;
       // 1. We iterate through all the operators that have candidate FKs and
@@ -1290,13 +1290,15 @@ public class StatsRulesProcFactory {
           distinctVals.add(csFK.getCountDistint());
         }
       }
+      long newNumRows;
       if (csFKs.size() == 1) {
         // there is only one FK
-        this.newNumRows = newrows;
+        newNumRows = newrows;
       } else {
         // there is more than one FK
-        this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
+        newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
       }
+      return newNumRows;
     }
 
     private float getSelectivitySimpleTree(Operator<? extends OperatorDesc> op) {


Mime
View raw message