hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rhbut...@apache.org
Subject svn commit: r1622819 - in /hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql: optimizer/optiq/ optimizer/optiq/stats/ parse/
Date Fri, 05 Sep 2014 23:45:06 GMT
Author: rhbutani
Date: Fri Sep  5 23:45:05 2014
New Revision: 1622819

URL: http://svn.apache.org/r1622819
Log:
HIVE-7915 CBO: more cost model changes (Harish Butani via John Pullokkaran)

Added:
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java
Modified:
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java?rev=1622819&r1=1622818&r2=1622819&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
(original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveDefaultRelMetadataProvider.java
Fri Sep  5 23:45:05 2014
@@ -3,7 +3,9 @@ package org.apache.hadoop.hive.ql.optimi
 import com.google.common.collect.ImmutableList;
 
 import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdDistinctRowCount;
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdRowCount;
 import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdSelectivity;
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.HiveRelMdUniqueKeys;
 import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
 import org.eigenbase.rel.metadata.DefaultRelMetadataProvider;
 import org.eigenbase.rel.metadata.RelMetadataProvider;
@@ -23,5 +25,7 @@ public class HiveDefaultRelMetadataProvi
   public static final RelMetadataProvider INSTANCE = ChainedRelMetadataProvider.of(ImmutableList
                                                        .of(HiveRelMdDistinctRowCount.SOURCE,
                                                            HiveRelMdSelectivity.SOURCE,
+                                                           HiveRelMdRowCount.SOURCE,
+                                                           HiveRelMdUniqueKeys.SOURCE,
                                                            new DefaultRelMetadataProvider()));
 }

Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java?rev=1622819&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java
(added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdRowCount.java
Fri Sep  5 23:45:05 2014
@@ -0,0 +1,420 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Set;
+
+import net.hydromatic.optiq.BuiltinMethod;
+import net.hydromatic.optiq.util.BitSets;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.RelVisitor;
+import org.eigenbase.rel.TableAccessRelBase;
+import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMdRowCount;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMetadataQuery;
+import org.eigenbase.rel.rules.SemiJoinRel;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexCall;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.util.Holder;
+import org.eigenbase.util.Pair;
+
+public class HiveRelMdRowCount extends RelMdRowCount {
+
+  protected static final Log LOG  = LogFactory.getLog(HiveRelMdRowCount.class.getName());
+
+
+  public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+      .reflectiveSource(BuiltinMethod.ROW_COUNT.method, new HiveRelMdRowCount());
+
+  protected HiveRelMdRowCount() {
+    super();
+  }
+
+  public Double getRowCount(JoinRelBase join) {
+    PKFKRelationInfo pkfk = analyzeJoinForPKFK(join);
+    if (pkfk != null) {
+      double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+      selectivity = Math.min(1.0, selectivity);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Identified Primary - Foreign Key relation:");
+        LOG.debug(RelOptUtil.toString(join));
+        LOG.debug(pkfk);
+      }
+      return pkfk.fkInfo.rowCount * selectivity;
+    }
+    return join.getRows();
+  }
+
+  public Double getRowCount(SemiJoinRel rel) {
+    PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel);
+    if (pkfk != null) {
+      double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+      selectivity = Math.min(1.0, selectivity);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Identified Primary - Foreign Key relation:");
+        LOG.debug(RelOptUtil.toString(rel));
+        LOG.debug(pkfk);
+      }
+      return pkfk.fkInfo.rowCount * selectivity;
+    }
+    return super.getRowCount(rel);
+  }
+
+  static class PKFKRelationInfo {
+    public final int fkSide;
+    public final double ndvScalingFactor;
+    public final FKSideInfo fkInfo;
+    public final PKSideInfo pkInfo;
+    public final boolean isPKSideSimple;
+
+    PKFKRelationInfo(int fkSide,
+        FKSideInfo fkInfo,
+        PKSideInfo pkInfo,
+        double ndvScalingFactor,
+        boolean isPKSideSimple) {
+      this.fkSide = fkSide;
+      this.fkInfo = fkInfo;
+      this.pkInfo = pkInfo;
+      this.ndvScalingFactor = ndvScalingFactor;
+      this.isPKSideSimple = isPKSideSimple;
+    }
+
+    public String toString() {
+      return String.format(
+          "Primary - Foreign Key join:\n\tfkSide = %d\n\tFKInfo:%s\n" +
+          "\tPKInfo:%s\n\tisPKSideSimple:%s\n\tNDV Scaling Factor:%.2f\n",
+          fkSide,
+          fkInfo,
+          pkInfo,
+          isPKSideSimple,
+          ndvScalingFactor);
+    }
+  }
+
+  static class FKSideInfo {
+    public final double rowCount;
+    public final double distinctCount;
+    public FKSideInfo(double rowCount, double distinctCount) {
+      this.rowCount = rowCount;
+      this.distinctCount = distinctCount;
+    }
+
+    public String toString() {
+      return String.format("FKInfo(rowCount=%.2f,ndv=%.2f)", rowCount, distinctCount);
+    }
+  }
+
+  static class PKSideInfo extends FKSideInfo {
+    public final double selectivity;
+    public PKSideInfo(double rowCount, double distinctCount, double selectivity) {
+      super(rowCount, distinctCount);
+      this.selectivity = selectivity;
+    }
+
+    public String toString() {
+      return String.format("PKInfo(rowCount=%.2f,ndv=%.2f,selectivity=%.2f)", rowCount, distinctCount,selectivity);
+    }
+  }
+
+  /*
+   * For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can
+   * infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is
+   * like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor
+   * based on the Selectivity of the PK/Dim table side.
+   *
+   * 1. If both T1.x and T2.y are keys then use the larger one as the PK side.
+   * 2. In case of outer Joins: a) The FK side should be the Null Preserving
+   * side. It doesn't make sense to apply this heuristic in case of Dim loj Fact
+   * or Fact roj Dim b) The selectivity factor applied on the Fact Table should
+   * be 1.
+   */
+  public static PKFKRelationInfo analyzeJoinForPKFK(JoinRelBase joinRel) {
+
+    RelNode left = joinRel.getInputs().get(0);
+    RelNode right = joinRel.getInputs().get(1);
+
+    final List<RexNode> initJoinFilters = RelOptUtil.conjunctions(joinRel
+        .getCondition());
+
+    /*
+     * No joining condition.
+     */
+    if (initJoinFilters.isEmpty()) {
+      return null;
+    }
+
+    List<RexNode> leftFilters = new ArrayList<RexNode>();
+    List<RexNode> rightFilters = new ArrayList<RexNode>();
+    List<RexNode> joinFilters = new ArrayList<RexNode>(initJoinFilters);
+    final Holder<JoinRelType> joinTypeHolder = Holder.of(joinRel.getJoinType());
+
+    // @todo: remove this. 8/28/14 hb
+    // for now adding because RelOptUtil.classifyFilters has an assertion about
+    // column counts that is not true for semiJoins.
+    if (joinRel instanceof SemiJoinRel) {
+      return null;
+    }
+
+    RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(),
+        false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel
+            .getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters,
+        rightFilters, joinTypeHolder, false);
+
+    Pair<Integer, Integer> joinCols = canHandleJoin(joinRel, leftFilters,
+        rightFilters, joinFilters);
+    if (joinCols == null) {
+      return null;
+    }
+    int leftColIdx = joinCols.left;
+    int rightColIdx = joinCols.right;
+
+    RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+    RexNode leftPred = RexUtil
+        .composeConjunction(rexBuilder, leftFilters, true);
+    RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters,
+        true);
+    BitSet lBitSet = BitSets.of(leftColIdx);
+    BitSet rBitSet = BitSets.of(rightColIdx);
+
+    /*
+     * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return
+     * null.
+     */
+    boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+        .getJoinType() == JoinRelType.RIGHT)
+        && !(joinRel instanceof SemiJoinRel) && isKey(lBitSet, left);
+    boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+        .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right);
+
+    if (!leftIsKey && !rightIsKey) {
+      return null;
+    }
+
+    double leftRowCount = RelMetadataQuery.getRowCount(left)
+        * RelMetadataQuery.getSelectivity(left, leftPred);
+    double rightRowCount = RelMetadataQuery.getRowCount(right)
+        * RelMetadataQuery.getSelectivity(right, rightPred);
+
+    if (leftIsKey && rightIsKey) {
+      if (rightRowCount < leftRowCount) {
+        leftIsKey = false;
+      }
+    }
+
+    int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1;
+
+    boolean isPKSideSimpleTree = pkSide != -1 ? 
+        IsSimpleTreeOnJoinKey.check(
+            pkSide == 0 ? left : right,
+            pkSide == 0 ? leftColIdx : rightColIdx) : false;
+
+   double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet,
leftPred) : -1;
+   double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet,
rightPred) : -1;
+
+   /*
+    * If the ndv of the PK - FK side don't match, and the PK side is a filter
+    * on the Key column then scale the NDV on the FK side.
+    *
+    * As described by Peter Boncz: http://databasearchitects.blogspot.com/
+    * in such cases we can be off by a large margin in the Join cardinality
+    * estimate. The e.g. he provides is on the join of StoreSales and DateDim
+    * on the TPCDS dataset. Since the DateDim is populated for 20 years into
+    * the future, while the StoreSales only has 5 years worth of data, there
+    * are 40 times fewer distinct dates in StoreSales.
+    *
+    * In general it is hard to infer the range for the foreign key on an
+    * arbitrary expression. For e.g. the NDV for DayofWeek is the same
+    * irrespective of NDV on the number of unique days, whereas the
+    * NDV of Quarters has the same ratio as the NDV on the keys.
+    *
+    * But for expressions that apply only on columns that have the same NDV
+    * as the key (implying that they are alternate keys) we can apply the
+    * ratio. So in the case of StoreSales - DateDim joins for predicate on the
+    * d_date column we can apply the scaling factor.
+    */
+   double ndvScalingFactor = 1.0;
+   if ( isPKSideSimpleTree ) {
+     ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV;
+   }
+
+    if (pkSide == 0) {
+      FKSideInfo fkInfo = new FKSideInfo(rightRowCount,
+          rightNDV);
+      PKSideInfo pkInfo = new PKSideInfo(leftRowCount,
+          leftNDV,
+          joinRel.getJoinType().generatesNullsOnRight() ? 1.0 :
+            RelMetadataQuery.getSelectivity(left, leftPred));
+
+      return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+    }
+
+    if (pkSide == 1) {
+      FKSideInfo fkInfo = new FKSideInfo(leftRowCount,
+          leftNDV);
+      PKSideInfo pkInfo = new PKSideInfo(rightRowCount,
+          rightNDV,
+          joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 :
+            RelMetadataQuery.getSelectivity(right, rightPred));
+
+      return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+    }
+
+    return null;
+  }
+
+  private static boolean isKey(BitSet c, RelNode rel) {
+    boolean isKey = false;
+    Set<BitSet> keys = RelMetadataQuery.getUniqueKeys(rel);
+    if (keys != null) {
+      for (BitSet key : keys) {
+        if (key.equals(c)) {
+          isKey = true;
+          break;
+        }
+      }
+    }
+    return isKey;
+  }
+
+  /*
+   * 1. Join condition must be an Equality Predicate.
+   * 2. both sides must reference 1 column.
+   * 3. If needed flip the columns.
+   */
+  private static Pair<Integer, Integer> canHandleJoin(JoinRelBase joinRel,
+      List<RexNode> leftFilters, List<RexNode> rightFilters,
+      List<RexNode> joinFilters) {
+
+    /*
+     * If after classifying filters there is more than 1 joining predicate, we
+     * don't handle this. Return null.
+     */
+    if (joinFilters.size() != 1) {
+      return null;
+    }
+
+    RexNode joinCond = joinFilters.get(0);
+
+    int leftColIdx;
+    int rightColIdx;
+
+    if (!(joinCond instanceof RexCall)) {
+      return null;
+    }
+
+    if (((RexCall) joinCond).getOperator() != SqlStdOperatorTable.EQUALS) {
+      return null;
+    }
+
+    BitSet leftCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(0));
+    BitSet rightCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(1));
+
+    if (leftCols.cardinality() != 1 || rightCols.cardinality() != 1 ) {
+      return null;
+    }
+
+    int nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size();
+    int nFieldsRight = joinRel.getRight().getRowType().getFieldList().size();
+    int nSysFields = joinRel.getSystemFieldList().size();
+    BitSet rightFieldsBitSet = BitSets.range(nSysFields + nFieldsLeft,
+        nSysFields + nFieldsLeft + nFieldsRight);
+    /*
+     * flip column references if join condition specified in reverse order to
+     * join sources.
+     */
+    if (BitSets.contains(rightFieldsBitSet, leftCols)) {
+      BitSet t = leftCols;
+      leftCols = rightCols;
+      rightCols = t;
+    }
+
+    leftColIdx = leftCols.nextSetBit(0) - nSysFields;
+    rightColIdx = rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft);
+
+    return new Pair<Integer, Integer>(leftColIdx, rightColIdx);
+  }
+
+  private static class IsSimpleTreeOnJoinKey extends RelVisitor {
+
+    int joinKey;
+    boolean simpleTree;
+
+    static boolean check(RelNode r, int joinKey) {
+      IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey);
+      v.go(r);
+      return v.simpleTree;
+    }
+
+    IsSimpleTreeOnJoinKey(int joinKey) {
+      super();
+      this.joinKey = joinKey;
+      simpleTree = true;
+    }
+
+    @Override
+    public void visit(RelNode node, int ordinal, RelNode parent) {
+
+      if (node instanceof TableAccessRelBase) {
+        simpleTree = true;
+      } else if (node instanceof ProjectRelBase) {
+        simpleTree = isSimple((ProjectRelBase) node);
+      } else if (node instanceof FilterRelBase) {
+        simpleTree = isSimple((FilterRelBase) node);
+      } else {
+        simpleTree = false;
+      }
+
+      if (simpleTree) {
+        super.visit(node, ordinal, parent);
+      }
+    }
+
+    private boolean isSimple(ProjectRelBase project) {
+      RexNode r = project.getProjects().get(joinKey);
+      if (r instanceof RexInputRef) {
+        joinKey = ((RexInputRef) r).getIndex();
+        return true;
+      }
+      return false;
+    }
+
+    private boolean isSimple(FilterRelBase filter) {
+      BitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition());
+      return isKey(condBits, filter);
+    }
+
+  }
+
+}

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java?rev=1622819&r1=1622818&r2=1622819&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java
(original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdSelectivity.java
Fri Sep  5 23:45:05 2014
@@ -1,6 +1,7 @@
 package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -77,59 +78,84 @@ public class HiveRelMdSelectivity extend
     List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
     int noOfPE = peLst.size();
     if (noOfPE > 0) {
-      // 3.1 Use first conjunctive predicate element's NDV as the seed
-      ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap);
+      ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
 
-      // 3.2 if conjunctive predicate elements are more than one, then walk
-      // through them one by one. Compute cross product of NDV. Cross product is
-      // computed by multiplying the largest NDV of all of the conjunctive
-      // predicate
-      // elements with degraded NDV of rest of the conjunctive predicate
-      // elements. NDV is
-      // degraded using log function.Finally the ndvCrossProduct is fenced at
-      // the join
-      // cross product to ensure that NDV can not exceed worst case join
-      // cardinality.<br>
-      // NDV of a conjunctive predicate element is the max NDV of all arguments
-      // to lhs, rhs expressions.
-      // NDV(JoinCondition) = min (left cardinality * right cardinality,
-      // ndvCrossProduct(JoinCondition))
-      // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2))
-      // where pex is the predicate element of join condition with max ndv.
-      // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr))
-      // NDV(expr) = max(NDV( expr args))
-      if (noOfPE > 1) {
-        double maxNDVSoFar = ndvCrossProduct;
-        double ndvToBeSmoothed;
-        double tmpNDV;
-
-        for (int i = 1; i < noOfPE; i++) {
-          tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap);
-          if (tmpNDV > maxNDVSoFar) {
-            ndvToBeSmoothed = maxNDVSoFar;
-            maxNDVSoFar = tmpNDV;
-            ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV;
-          } else {
-            ndvToBeSmoothed = tmpNDV;
-          }
-          // TODO: revisit the fence
-          if (ndvToBeSmoothed > 3)
-            ndvCrossProduct *= Math.log(ndvToBeSmoothed);
-          else
-            ndvCrossProduct *= ndvToBeSmoothed;
-        }
+      if (j.isLeftSemiJoin())
+        ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
+            ndvCrossProduct);
+      else
+        ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
+            * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+    }
+
+    // 4. Join Selectivity = 1/NDV
+    return (1 / ndvCrossProduct);
+  }
 
-        if (j.isLeftSemiJoin())
-          ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
-              ndvCrossProduct);
+  // 3.2 if conjunctive predicate elements are more than one, then walk
+  // through them one by one. Compute cross product of NDV. Cross product is
+  // computed by multiplying the largest NDV of all of the conjunctive
+  // predicate
+  // elements with degraded NDV of rest of the conjunctive predicate
+  // elements. NDV is
+  // degraded using log function.Finally the ndvCrossProduct is fenced at
+  // the join
+  // cross product to ensure that NDV can not exceed worst case join
+  // cardinality.<br>
+  // NDV of a conjunctive predicate element is the max NDV of all arguments
+  // to lhs, rhs expressions.
+  // NDV(JoinCondition) = min (left cardinality * right cardinality,
+  // ndvCrossProduct(JoinCondition))
+  // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2))
+  // where pex is the predicate element of join condition with max ndv.
+  // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr))
+  // NDV(expr) = max(NDV( expr args))
+  protected double logSmoothing(List<JoinLeafPredicateInfo> peLst, ImmutableMap<Integer,
Double> colStatMap) {
+    int noOfPE = peLst.size();
+    double ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap);
+    if (noOfPE > 1) {
+      double maxNDVSoFar = ndvCrossProduct;
+      double ndvToBeSmoothed;
+      double tmpNDV;
+
+      for (int i = 1; i < noOfPE; i++) {
+        tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap);
+        if (tmpNDV > maxNDVSoFar) {
+          ndvToBeSmoothed = maxNDVSoFar;
+          maxNDVSoFar = tmpNDV;
+          ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV;
+        } else {
+          ndvToBeSmoothed = tmpNDV;
+        }
+        // TODO: revisit the fence
+        if (ndvToBeSmoothed > 3)
+          ndvCrossProduct *= Math.log(ndvToBeSmoothed);
         else
-          ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
-              * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+          ndvCrossProduct *= ndvToBeSmoothed;
       }
     }
+    return ndvCrossProduct;
+  }
 
-    // 4. Join Selectivity = 1/NDV
-    return (1 / ndvCrossProduct);
+  /*
+   * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct =
+   * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ...
+   */
+  protected double exponentialBackoff(List<JoinLeafPredicateInfo> peLst,
+      ImmutableMap<Integer, Double> colStatMap) {
+    int noOfPE = peLst.size();
+    List<Double> ndvs = new ArrayList<Double>(noOfPE);
+    for (int i = 0; i < noOfPE; i++) {
+      ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap));
+    }
+    Collections.sort(ndvs);
+    Collections.reverse(ndvs);
+    double ndvCrossProduct = 1.0;
+    for (int i = 0; i < ndvs.size(); i++) {
+      double n = Math.pow(ndvs.get(i), Math.pow(1 / 2.0, i));
+      ndvCrossProduct *= n;
+    }
+    return ndvCrossProduct;
   }
 
   private RexNode getCombinedPredicateForJoin(HiveJoinRel j, RexNode additionalPredicate)
{
@@ -181,4 +207,5 @@ public class HiveRelMdSelectivity extend
 
     return maxNDVSoFar;
   }
+  
 }

Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java?rev=1622819&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java
(added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdUniqueKeys.java
Fri Sep  5 23:45:05 2014
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.hydromatic.optiq.BuiltinMethod;
+import net.hydromatic.optiq.util.BitSets;
+
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.eigenbase.rel.ProjectRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.BuiltInMetadata;
+import org.eigenbase.rel.metadata.Metadata;
+import org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMdUniqueKeys;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+
+import com.google.common.base.Function;
+
+public class HiveRelMdUniqueKeys {
+
+  public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+      .reflectiveSource(BuiltinMethod.UNIQUE_KEYS.method,
+          new HiveRelMdUniqueKeys());
+
+  /*
+   * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
+   * - min(col) = rowCount(col)
+   * 
+   * Why are we intercepting ProjectRelbase and not TableScan? Because if we
+   * have a method for TableScan, it will not know which columns to check for.
+   * Inferring Uniqueness for all columns is very expensive right now. The flip
+   * side of doing this is, it only works post Field Trimming.
+   */
+  public Set<BitSet> getUniqueKeys(ProjectRelBase rel, boolean ignoreNulls) {
+
+    RelNode child = rel.getChild();
+
+    if (!(child instanceof HiveTableScanRel)) {
+      Function<RelNode, Metadata> fn = RelMdUniqueKeys.SOURCE.apply(
+          rel.getClass(), BuiltInMetadata.UniqueKeys.class);
+      return ((BuiltInMetadata.UniqueKeys) fn.apply(rel))
+          .getUniqueKeys(ignoreNulls);
+    }
+
+    HiveTableScanRel tScan = (HiveTableScanRel) child;
+    Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
+    int projectPos = 0;
+    int colStatsPos = 0;
+
+    BitSet projectedCols = new BitSet();
+    for (RexNode r : rel.getProjects()) {
+      if (r instanceof RexInputRef) {
+        projectedCols.set(((RexInputRef) r).getIndex());
+        posMap.put(colStatsPos, projectPos);
+        colStatsPos++;
+      }
+      projectPos++;
+    }
+
+    double numRows = tScan.getRows();
+    List<ColStatistics> colStats = tScan.getColStat(BitSets
+        .toList(projectedCols));
+    Set<BitSet> keys = new HashSet<BitSet>();
+
+    colStatsPos = 0;
+    for (ColStatistics cStat : colStats) {
+      boolean isKey = false;
+      if (cStat.getCountDistint() >= numRows) {
+        isKey = true;
+      }
+      if ( !isKey && cStat.getRange() != null &&
+          cStat.getRange().maxValue != null  &&
+          cStat.getRange().minValue != null) {
+        double r = cStat.getRange().maxValue.doubleValue() - 
+            cStat.getRange().minValue.doubleValue() + 1;
+        isKey = (numRows == r);
+      }
+      if ( isKey ) {
+        BitSet key = new BitSet();
+        key.set(posMap.get(colStatsPos));
+        keys.add(key);
+      }
+      colStatsPos++;
+    }
+
+    return keys;
+  }
+
+}

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1622819&r1=1622818&r2=1622819&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri
Sep  5 23:45:05 2014
@@ -11946,9 +11946,9 @@ public class SemanticAnalyzer extends Ba
       if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
         LOG.debug("CBO Planning details:\n");
         LOG.debug("Original Plan:\n");
-        LOG.debug(RelOptUtil.toString(optiqGenPlan, SqlExplainLevel.ALL_ATTRIBUTES));
+        LOG.debug(RelOptUtil.toString(optiqGenPlan));
         LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n");
-        LOG.debug(RelOptUtil.toString(optiqPreCboPlan, SqlExplainLevel.ALL_ATTRIBUTES));
+        LOG.debug(RelOptUtil.toString(optiqPreCboPlan));
         LOG.debug("Plan After Join Reordering:\n");
         LOG.debug(RelOptUtil.toString(optiqOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES));
       }



Mime
View raw message