hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpull...@apache.org
Subject svn commit: r1644224 [2/5] - in /hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/ ql/src/java/org...
Date Tue, 09 Dec 2014 23:02:24 GMT
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.Util;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveProject extends Project implements HiveRelNode {
+
+  public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl();
+
+  private final List<Integer>        virtualCols;
+
+  /**
+   * Creates a HiveProject.
+   *
+   * @param cluster
+   *          Cluster this relational expression belongs to
+   * @param child
+   *          input relational expression
+   * @param exps
+   *          List of expressions for the input columns
+   * @param rowType
+   *          output row type
+   * @param flags
+   *          values as in {@link Project.Flags}
+   */
+  public HiveProject(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+      List<? extends RexNode> exps, RelDataType rowType, int flags) {
+    super(cluster, traitSet, child, exps, rowType, flags);
+    virtualCols = ImmutableList.copyOf(HiveCalciteUtil.getVirtualCols(exps));
+  }
+
+  /**
+   * Creates a HiveProject with no sort keys.
+   *
+   * @param child
+   *          input relational expression
+   * @param exps
+   *          set of expressions for the input columns
+   * @param fieldNames
+   *          aliases of the expressions
+   */
+  public static HiveProject create(RelNode child, List<? extends RexNode> exps,
+    List<String> fieldNames) throws CalciteSemanticException{
+    RelOptCluster cluster = child.getCluster();
+
+    // 1 Ensure columnNames are unique - CALCITE-411
+    if (fieldNames != null && !Util.isDistinct(fieldNames)) {
+      String msg = "Select list contains multiple expressions with the same name." + fieldNames;
+      throw new CalciteSemanticException(msg);
+    }
+    RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames);
+    return create(cluster, child, exps, rowType, Collections.<RelCollation> emptyList());
+  }
+
+  /**
+   * Creates a HiveProject.
+   */
+  public static HiveProject create(RelOptCluster cluster, RelNode child, List<? extends RexNode> exps,
+      RelDataType rowType, final List<RelCollation> collationList) {
+    RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster);
+    return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+  }
+
+  /**
+   * Creates a HiveProject.
+   */
+  public static HiveProject create(RelOptCluster cluster, RelNode child, List<? extends RexNode> exps,
+      RelDataType rowType, RelTraitSet traitSet, final List<RelCollation> collationList) {
+    return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+  }
+
+  /**
+   * Creates a relational expression which projects the output fields of a
+   * relational expression according to a partial mapping.
+   *
+   * <p>
+   * A partial mapping is weaker than a permutation: every target has one
+   * source, but a source may have 0, 1 or more than one targets. Usually the
+   * result will have fewer fields than the source, unless some source fields
+   * are projected multiple times.
+   *
+   * <p>
+   * This method could optimize the result as {@link #permute} does, but does
+   * not at present.
+   *
+   * @param rel
+   *          Relational expression
+   * @param mapping
+   *          Mapping from source fields to target fields. The mapping type must
+   *          obey the constraints {@link MappingType#isMandatorySource()} and
+   *          {@link MappingType#isSingleSource()}, as does
+   *          {@link MappingType#INVERSE_FUNCTION}.
+   * @param fieldNames
+   *          Field names; if null, or if a particular entry is null, the name
+   *          of the permuted field is used
+   * @return relational expression which projects a subset of the input fields
+   * @throws CalciteSemanticException
+   */
+  public static RelNode projectMapping(RelNode rel, Mapping mapping, List<String> fieldNames) throws CalciteSemanticException {
+    assert mapping.getMappingType().isSingleSource();
+    assert mapping.getMappingType().isMandatorySource();
+
+    if (mapping.isIdentity()) {
+      return rel;
+    }
+
+    final List<String> outputNameList = new ArrayList<String>();
+    final List<RexNode> outputProjList = new ArrayList<RexNode>();
+    final List<RelDataTypeField> fields = rel.getRowType().getFieldList();
+    final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+
+    for (int i = 0; i < mapping.getTargetCount(); i++) {
+      int source = mapping.getSource(i);
+      final RelDataTypeField sourceField = fields.get(source);
+      outputNameList
+          .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField
+              .getName() : fieldNames.get(i));
+      outputProjList.add(rexBuilder.makeInputRef(rel, source));
+    }
+
+    return create(rel, outputProjList, outputNameList);
+  }
+
+  @Override
+  public Project copy(RelTraitSet traitSet, RelNode input, List<RexNode> exps,
+      RelDataType rowType) {
+    assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION);
+    return new HiveProject(getCluster(), traitSet, input, exps, rowType, getFlags());
+  }
+
+  @Override
+  public RelOptCost computeSelfCost(RelOptPlanner planner) {
+    return HiveCost.FACTORY.makeZeroCost();
+  }
+
+  @Override
+  public void implement(Implementor implementor) {
+  }
+
+  public List<Integer> getVirtualCols() {
+    return virtualCols;
+  }
+
+  /**
+   * Implementation of {@link ProjectFactory} that returns
+   * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject}
+   * .
+   */
+  private static class HiveProjectFactoryImpl implements ProjectFactory {
+
+    @Override
+    public RelNode createProject(RelNode child,
+        List<? extends RexNode> childExprs, List<String> fieldNames) {
+      RelOptCluster cluster = child.getCluster();
+      RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), childExprs, fieldNames);
+      RelNode project = HiveProject.create(cluster, child,
+          childExprs, rowType,
+          child.getTraitSet(), Collections.<RelCollation> emptyList());
+
+      return project;
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.plan.Convention;
+import org.apache.calcite.rel.RelNode;
+
+public interface HiveRelNode extends RelNode {
+  void implement(Implementor implementor);
+
+  /** Calling convention for relational operations that occur in Hive. */
+  final Convention CONVENTION = new Convention.Impl("HIVE", HiveRelNode.class);
+
+  class Implementor {
+
+    public void visitChild(int ordinal, RelNode input) {
+      assert ordinal == 0;
+      ((HiveRelNode) input).implement(this);
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.Map;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveSort extends Sort implements HiveRelNode {
+
+  public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory();
+
+  // NOTE: this is to work around Hive Calcite Limitations w.r.t OB.
+  // 1. Calcite can not accept expressions in OB; instead it needs to be expressed
+  // as VC in input Select.
+  // 2. Hive can not preserve ordering through select boundaries.
+  // 3. This map is used for outermost OB to migrate the VC corresponding OB
+  // expressions from input select.
+  // 4. This is used by ASTConverter after we are done with Calcite Planning
+  private ImmutableMap<Integer, RexNode> mapOfInputRefToRexCall;
+
+  public HiveSort(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+      RelCollation collation, RexNode offset, RexNode fetch) {
+    super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
+        offset, fetch);
+  }
+
+  @Override
+  public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
+      RexNode offset, RexNode fetch) {
+    // TODO: can we blindly copy sort trait? What if inputs changed and we
+    // are now sorting by different cols
+    RelCollation canonizedCollation = traitSet.canonize(newCollation);
+    return new HiveSort(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+  }
+
+  public RexNode getFetchExpr() {
+    return fetch;
+  }
+
+  public void setInputRefToCallMap(ImmutableMap<Integer, RexNode> refToCall) {
+    this.mapOfInputRefToRexCall = refToCall;
+  }
+
+  public Map<Integer, RexNode> getInputRefToCallMap() {
+    return this.mapOfInputRefToRexCall;
+  }
+
+  @Override
+  public void implement(Implementor implementor) {
+  }
+
+  private static class HiveSortRelFactory implements RelFactories.SortFactory {
+
+    @Override
+    public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation,
+        RexNode offset, RexNode fetch) {
+      return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch);
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+
+/**
+ * Relational expression representing a scan of a HiveDB collection.
+ *
+ * <p>
+ * Additional operations might be applied, using the "find" or "aggregate"
+ * methods.
+ * </p>
+ */
+public class HiveTableScan extends TableScan implements HiveRelNode {
+
+  /**
+   * Creates a HiveTableScan.
+   *
+   * @param cluster
+   *          Cluster
+   * @param traitSet
+   *          Traits
+   * @param table
+   *          Table
+   * @param table
+   *          HiveDB table
+   */
+  public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table,
+      RelDataType rowtype) {
+    super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table);
+    assert getConvention() == HiveRelNode.CONVENTION;
+  }
+
+  @Override
+  public RelNode copy(RelTraitSet traitSet, List<RelNode> inputs) {
+    assert inputs.isEmpty();
+    return this;
+  }
+
+  @Override
+  public RelOptCost computeSelfCost(RelOptPlanner planner) {
+    return HiveCost.FACTORY.makeZeroCost();
+  }
+
+  @Override
+  public void register(RelOptPlanner planner) {
+
+  }
+
+  @Override
+  public void implement(Implementor implementor) {
+
+  }
+
+  @Override
+  public double getRows() {
+    return ((RelOptHiveTable) table).getRowCount();
+  }
+
+  public List<ColStatistics> getColStat(List<Integer> projIndxLst) {
+    return ((RelOptHiveTable) table).getColStat(projIndxLst);
+  }
+}
\ No newline at end of file

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor;
+
+public class HiveUnion extends Union {
+
+  public static final HiveUnionRelFactory UNION_REL_FACTORY = new HiveUnionRelFactory();
+
+  public HiveUnion(RelOptCluster cluster, RelTraitSet traits, List<RelNode> inputs) {
+    super(cluster, traits, inputs, true);
+  }
+
+  @Override
+  public SetOp copy(RelTraitSet traitSet, List<RelNode> inputs, boolean all) {
+    return new HiveUnion(this.getCluster(), traitSet, inputs);
+  }
+
+  public void implement(Implementor implementor) {
+  }
+
+  private static class HiveUnionRelFactory implements RelFactories.SetOpFactory {
+
+    @Override
+    public RelNode createSetOp(SqlKind kind, List<RelNode> inputs, boolean all) {
+      if (kind != SqlKind.UNION) {
+        throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind);
+      }
+      return new HiveUnion(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs);
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.BitSet;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelOptUtil.InputFinder;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.rules.FilterJoinRule;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+public abstract class HiveFilterJoinRule extends FilterJoinRule {
+
+  public static final HiveFilterJoinRule FILTER_ON_JOIN = new HiveFilterJoinMergeRule();
+
+  public static final HiveFilterJoinRule JOIN           = new HiveFilterJoinTransposeRule();
+
+  /**
+   * Creates a PushFilterPastJoinRule with an explicit root operand.
+   */
+  protected HiveFilterJoinRule(RelOptRuleOperand operand, String id, boolean smart,
+      RelFactories.FilterFactory filterFactory, RelFactories.ProjectFactory projectFactory) {
+    super(operand, id, smart, filterFactory, projectFactory);
+  }
+
+  /**
+   * Rule that tries to push filter expressions into a join condition and into
+   * the inputs of the join.
+   */
+  public static class HiveFilterJoinMergeRule extends HiveFilterJoinRule {
+    public HiveFilterJoinMergeRule() {
+      super(RelOptRule.operand(Filter.class,
+          RelOptRule.operand(Join.class, RelOptRule.any())),
+          "HiveFilterJoinRule:filter", true, HiveFilter.DEFAULT_FILTER_FACTORY,
+          HiveProject.DEFAULT_PROJECT_FACTORY);
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+      Filter filter = call.rel(0);
+      Join join = call.rel(1);
+      super.perform(call, filter, join);
+    }
+  }
+
+  public static class HiveFilterJoinTransposeRule extends HiveFilterJoinRule {
+    public HiveFilterJoinTransposeRule() {
+      super(RelOptRule.operand(Join.class, RelOptRule.any()),
+          "HiveFilterJoinRule:no-filter", true, HiveFilter.DEFAULT_FILTER_FACTORY,
+          HiveProject.DEFAULT_PROJECT_FACTORY);
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+      Join join = call.rel(0);
+      super.perform(call, null, join);
+    }
+  }
+
+  /*
+   * Any predicates pushed down to joinFilters that aren't equality conditions:
+   * put them back as aboveFilters because Hive doesn't support not equi join
+   * conditions.
+   */
+  @Override
+  protected void validateJoinFilters(List<RexNode> aboveFilters, List<RexNode> joinFilters,
+      Join join, JoinRelType joinType) {
+    if (joinType.equals(JoinRelType.INNER)) {
+      ListIterator<RexNode> filterIter = joinFilters.listIterator();
+      while (filterIter.hasNext()) {
+        RexNode exp = filterIter.next();
+
+        if (exp instanceof RexCall) {
+          RexCall c = (RexCall) exp;
+          boolean validHiveJoinFilter = false;
+
+          if ((c.getOperator().getKind() == SqlKind.EQUALS)) {
+            validHiveJoinFilter = true;
+            for (RexNode rn : c.getOperands()) {
+              // NOTE: Hive dis-allows projections from both left & right side
+              // of join condition. Example: Hive disallows
+              // (r1.x +r2.x)=(r1.y+r2.y) on join condition.
+              if (filterRefersToBothSidesOfJoin(rn, join)) {
+                validHiveJoinFilter = false;
+                break;
+              }
+            }
+          } else if ((c.getOperator().getKind() == SqlKind.LESS_THAN)
+              || (c.getOperator().getKind() == SqlKind.GREATER_THAN)
+              || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL)
+              || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) {
+            validHiveJoinFilter = true;
+            // NOTE: Hive dis-allows projections from both left & right side of
+            // join in in equality condition. Example: Hive disallows (r1.x <
+            // r2.x) on join condition.
+            if (filterRefersToBothSidesOfJoin(c, join)) {
+              validHiveJoinFilter = false;
+            }
+          }
+
+          if (validHiveJoinFilter)
+            continue;
+        }
+
+        aboveFilters.add(exp);
+        filterIter.remove();
+      }
+    }
+  }
+
+  private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) {
+    boolean refersToBothSides = false;
+
+    int joinNoOfProjects = j.getRowType().getFieldCount();
+    ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply(
+            new BitSet(joinNoOfProjects));
+    ImmutableBitSet allLeftProjs = filterProjs.union(
+            ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount()));
+    ImmutableBitSet allRightProjs = filterProjs.union(
+            ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects));
+
+    filterProjs = filterProjs.union(InputFinder.bits(filter));
+
+    if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+      refersToBothSides = true;
+
+    return refersToBothSides;
+  }
+}
+
+// End PushFilterPastJoinRule.java
+

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class HivePartitionPruneRule extends RelOptRule {
+
+  HiveConf conf;
+
+  public HivePartitionPruneRule(HiveConf conf) {
+    super(operand(HiveFilter.class, operand(HiveTableScan.class, none())));
+    this.conf = conf;
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    HiveFilter filter = call.rel(0);
+    HiveTableScan tScan = call.rel(1);
+    perform(call, filter, tScan);
+  }
+
+  protected void perform(RelOptRuleCall call, Filter filter,
+      HiveTableScan tScan) {
+
+    RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
+    RexNode predicate = filter.getCondition();
+
+    Pair<RexNode, RexNode> predicates = PartitionPrune
+        .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
+    RexNode partColExpr = predicates.left;
+    hiveTable.computePartitionList(conf, partColExpr);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.rel.rules.ProjectMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+//Currently not used, turn this on later
+public class HiveProjectMergeRule extends ProjectMergeRule {
+  public static final HiveProjectMergeRule INSTANCE = new HiveProjectMergeRule();
+
+  public HiveProjectMergeRule() {
+    super(true, HiveProject.DEFAULT_PROJECT_FACTORY);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+public class PartitionPrune {
+
+  /**
+   * Breaks the predicate into 2 pieces. The first piece is the expressions that
+   * only contain partition columns and can be used for Partition Pruning; the
+   * second piece is the predicates that are left.
+   * 
+   * @param cluster
+   * @param hiveTable
+   * @param predicate
+   * @return a Pair of expressions, each of which maybe null. The 1st predicate
+   *         is expressions that only contain partition columns; the 2nd
+   *         predicate contains the remaining predicates.
+   */
+  public static Pair<RexNode, RexNode> extractPartitionPredicates(
+      RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
+    RexNode partitionPruningPred = predicate
+        .accept(new ExtractPartPruningPredicate(cluster, hiveTable));
+    RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(
+        cluster, partitionPruningPred));
+    return new Pair<RexNode, RexNode>(partitionPruningPred, remainingPred);
+  }
+
+  public static class ExtractPartPruningPredicate extends
+      RexVisitorImpl<RexNode> {
+
+    final RelOptHiveTable hiveTable;
+    final RelDataType rType;
+    final Set<String> partCols;
+    final RelOptCluster cluster;
+
+    public ExtractPartPruningPredicate(RelOptCluster cluster,
+        RelOptHiveTable hiveTable) {
+      super(true);
+      this.hiveTable = hiveTable;
+      rType = hiveTable.getRowType();
+      List<FieldSchema> pfs = hiveTable.getHiveTableMD().getPartCols();
+      partCols = new HashSet<String>();
+      for (FieldSchema pf : pfs) {
+        partCols.add(pf.getName());
+      }
+      this.cluster = cluster;
+    }
+
+    @Override
+    public RexNode visitLiteral(RexLiteral literal) {
+      return literal;
+    }
+
+    @Override
+    public RexNode visitInputRef(RexInputRef inputRef) {
+      RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex());
+      if (partCols.contains(f.getName())) {
+        return inputRef;
+      } else {
+        return null;
+      }
+    }
+
+    @Override
+    public RexNode visitCall(RexCall call) {
+      if (!deep) {
+        return null;
+      }
+
+      List<RexNode> args = new LinkedList<RexNode>();
+      boolean argsPruned = false;
+
+      GenericUDF hiveUDF = SqlFunctionConverter.getHiveUDF(call.getOperator(),
+          call.getType(), call.operands.size());
+      if (hiveUDF != null &&
+          !FunctionRegistry.isDeterministic(hiveUDF)) {
+        return null;
+      }
+
+      for (RexNode operand : call.operands) {
+        RexNode n = operand.accept(this);
+        if (n != null) {
+          args.add(n);
+        } else {
+          argsPruned = true;
+        }
+      }
+
+      if (call.getOperator() != SqlStdOperatorTable.AND) {
+        return argsPruned ? null : call;
+      } else {
+        if (args.size() == 0) {
+          return null;
+        } else if (args.size() == 1) {
+          return args.get(0);
+        } else {
+          return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+        }
+      }
+    }
+
+  }
+
+  public static class ExtractRemainingPredicate extends RexVisitorImpl<RexNode> {
+
+    List<RexNode> pruningPredicates;
+    final RelOptCluster cluster;
+
+    public ExtractRemainingPredicate(RelOptCluster cluster,
+        RexNode partPruningExpr) {
+      super(true);
+      this.cluster = cluster;
+      pruningPredicates = new ArrayList<RexNode>();
+      flattenPredicates(partPruningExpr);
+    }
+
+    private void flattenPredicates(RexNode r) {
+      if (r instanceof RexCall
+          && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) {
+        for (RexNode c : ((RexCall) r).getOperands()) {
+          flattenPredicates(c);
+        }
+      } else {
+        pruningPredicates.add(r);
+      }
+    }
+
+    @Override
+    public RexNode visitLiteral(RexLiteral literal) {
+      return literal;
+    }
+
+    @Override
+    public RexNode visitInputRef(RexInputRef inputRef) {
+      return inputRef;
+    }
+
+    @Override
+    public RexNode visitCall(RexCall call) {
+      if (!deep) {
+        return null;
+      }
+
+      if (call.getOperator() != SqlStdOperatorTable.AND) {
+        if (pruningPredicates.contains(call)) {
+          return null;
+        } else {
+          return call;
+        }
+      }
+
+      List<RexNode> args = new LinkedList<RexNode>();
+
+      for (RexNode operand : call.operands) {
+        RexNode n = operand.accept(this);
+        if (n != null) {
+          args.add(n);
+        }
+      }
+
+      if (args.size() == 0) {
+        return null;
+      } else if (args.size() == 1) {
+        return args.get(0);
+      } else {
+        return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+      }
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
+  private final RelNode childRel;
+  private final double  childCardinality;
+
+  protected FilterSelectivityEstimator(RelNode childRel) {
+    super(true);
+    this.childRel = childRel;
+    this.childCardinality = RelMetadataQuery.getRowCount(childRel);
+  }
+
+  public Double estimateSelectivity(RexNode predicate) {
+    return predicate.accept(this);
+  }
+
+  public Double visitCall(RexCall call) {
+    if (!deep) {
+      return 1.0;
+    }
+
+    /*
+     * Ignore any predicates on partition columns because we have already
+     * accounted for these in the Table row count.
+     */
+    if (isPartitionPredicate(call, this.childRel)) {
+      return 1.0;
+    }
+
+    Double selectivity = null;
+    SqlKind op = getOp(call);
+
+    switch (op) {
+    case AND: {
+      selectivity = computeConjunctionSelectivity(call);
+      break;
+    }
+
+    case OR: {
+      selectivity = computeDisjunctionSelectivity(call);
+      break;
+    }
+
+    case NOT:
+    case NOT_EQUALS: {
+      selectivity = computeNotEqualitySelectivity(call);
+      break;
+    }
+
+    case LESS_THAN_OR_EQUAL:
+    case GREATER_THAN_OR_EQUAL:
+    case LESS_THAN:
+    case GREATER_THAN: {
+      selectivity = ((double) 1 / (double) 3);
+      break;
+    }
+
+    case IN: {
+      // TODO: 1) check for duplicates 2) We assume in clause values to be
+      // present in NDV which may not be correct (Range check can find it) 3) We
+      // assume values in NDV set is uniformly distributed over col values
+      // (account for skewness - histogram).
+      selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
+      if (selectivity <= 0.0) {
+        selectivity = 0.10;
+      } else if (selectivity >= 1.0) {
+        selectivity = 1.0;
+      }
+      break;
+    }
+
+    default:
+      selectivity = computeFunctionSelectivity(call);
+    }
+
+    return selectivity;
+  }
+
+  /**
+   * NDV of "f1(x, y, z) != f2(p, q, r)" ->
+   * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
+   * <p>
+   * 
+   * @param call
+   * @return
+   */
+  private Double computeNotEqualitySelectivity(RexCall call) {
+    double tmpNDV = getMaxNDV(call);
+
+    if (tmpNDV > 1)
+      return (tmpNDV - (double) 1) / tmpNDV;
+    else
+      return 1.0;
+  }
+
+  /**
+   * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z).
+   * <p>
+   * Note that >, >=, <, <=, = ... are considered generic functions and uses
+   * this method to find their selectivity.
+   * 
+   * @param call
+   * @return
+   */
+  private Double computeFunctionSelectivity(RexCall call) {
+    return 1 / getMaxNDV(call);
+  }
+
+  /**
+   * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total
+   * number of tuples from child and m1 and m2 is the expected number of tuples
+   * from each part of the disjunction predicate.
+   * <p>
+   * Note we compute m1. m2.. by applying selectivity of the disjunctive element
+   * on the cardinality from child.
+   * 
+   * @param call
+   * @return
+   */
+  private Double computeDisjunctionSelectivity(RexCall call) {
+    Double tmpCardinality;
+    Double tmpSelectivity;
+    double selectivity = 1;
+
+    for (RexNode dje : call.getOperands()) {
+      tmpSelectivity = dje.accept(this);
+      if (tmpSelectivity == null) {
+        tmpSelectivity = 0.99;
+      }
+      tmpCardinality = childCardinality * tmpSelectivity;
+
+      if (tmpCardinality > 1 && tmpCardinality < childCardinality) {
+        tmpSelectivity = (1 - tmpCardinality / childCardinality);
+      } else {
+        tmpSelectivity = 1.0;
+      }
+
+      selectivity *= tmpSelectivity;
+    }
+
+    if (selectivity < 0.0)
+      selectivity = 0.0;
+
+    return (1 - selectivity);
+  }
+
+  /**
+   * Selectivity of conjunctive predicate -> (selectivity of conjunctive
+   * element1) * (selectivity of conjunctive element2)...
+   * 
+   * @param call
+   * @return
+   */
+  private Double computeConjunctionSelectivity(RexCall call) {
+    Double tmpSelectivity;
+    double selectivity = 1;
+
+    for (RexNode cje : call.getOperands()) {
+      tmpSelectivity = cje.accept(this);
+      if (tmpSelectivity != null) {
+        selectivity *= tmpSelectivity;
+      }
+    }
+
+    return selectivity;
+  }
+
+  private Double getMaxNDV(RexCall call) {
+    double tmpNDV;
+    double maxNDV = 1.0;
+    InputReferencedVisitor irv;
+
+    for (RexNode op : call.getOperands()) {
+      if (op instanceof RexInputRef) {
+        tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
+            ((RexInputRef) op).getIndex());
+        if (tmpNDV > maxNDV)
+          maxNDV = tmpNDV;
+      } else {
+        irv = new InputReferencedVisitor();
+        irv.apply(op);
+        for (Integer childProjIndx : irv.inputPosReferenced) {
+          tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx);
+          if (tmpNDV > maxNDV)
+            maxNDV = tmpNDV;
+        }
+      }
+    }
+
+    return maxNDV;
+  }
+
+  private boolean isPartitionPredicate(RexNode expr, RelNode r) {
+    if (r instanceof Project) {
+      expr = RelOptUtil.pushFilterPastProject(expr, (Project) r);
+      return isPartitionPredicate(expr, ((Project) r).getInput());
+    } else if (r instanceof Filter) {
+      return isPartitionPredicate(expr, ((Filter) r).getInput());
+    } else if (r instanceof HiveTableScan) {
+      RelOptHiveTable table = (RelOptHiveTable) ((HiveTableScan) r).getTable();
+      ImmutableBitSet cols = RelOptUtil.InputFinder.bits(expr);
+      return table.containsPartitionColumnsOnly(cols);
+    }
+    return false;
+  }
+
+  private SqlKind getOp(RexCall call) {
+    SqlKind op = call.getKind();
+
+    if (call.getKind().equals(SqlKind.OTHER_FUNCTION)
+        && SqlTypeUtil.inBooleanFamily(call.getType())) {
+      SqlOperator sqlOp = call.getOperator();
+      String opName = (sqlOp != null) ? sqlOp.getName() : "";
+      if (opName.equalsIgnoreCase("in")) {
+        op = SqlKind.IN;
+      }
+    }
+
+    return op;
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
+
+  private static final HiveRelMdDistinctRowCount INSTANCE =
+      new HiveRelMdDistinctRowCount();
+
+  public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider
+      .of(ImmutableList.of(
+
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.DISTINCT_ROW_COUNT.method, INSTANCE),
+
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.CUMULATIVE_COST.method, INSTANCE)));
+
+  private HiveRelMdDistinctRowCount() {
+  }
+
+  // Catch-all rule when none of the others apply.
+  @Override
+  public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey,
+      RexNode predicate) {
+    if (rel instanceof HiveTableScan) {
+      return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate);
+    }
+    /*
+     * For now use Calcite' default formulas for propagating NDVs up the Query
+     * Tree.
+     */
+    return super.getDistinctRowCount(rel, groupKey, predicate);
+  }
+
+  private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey,
+      RexNode predicate) {
+    List<Integer> projIndxLst = HiveCalciteUtil
+        .translateBitSetToProjIndx(groupKey);
+    List<ColStatistics> colStats = htRel.getColStat(projIndxLst);
+    Double noDistinctRows = 1.0;
+    for (ColStatistics cStat : colStats) {
+      noDistinctRows *= cStat.getCountDistint();
+    }
+
+    return Math.min(noDistinctRows, htRel.getRows());
+  }
+
+  public static Double getDistinctRowCount(RelNode r, int indx) {
+    ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx);
+    return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r
+        .getCluster().getRexBuilder().makeLiteral(true));
+  }
+
+  @Override
+  public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey,
+      RexNode predicate) {
+    if (rel instanceof HiveJoin) {
+      HiveJoin hjRel = (HiveJoin) rel;
+      //TODO: Improve this
+      if (hjRel.isLeftSemiJoin()) {
+        return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey,
+            rel.getCluster().getRexBuilder().makeLiteral(true));
+      } else {
+        return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(),
+            groupKey, predicate, true);
+      }
+    }
+
+    return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate);
+  }
+
+  /*
+   * Favor Broad Plans over Deep Plans.
+   */
+  public RelOptCost getCumulativeCost(HiveJoin rel) {
+    RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel);
+    List<RelNode> inputs = rel.getInputs();
+    RelOptCost maxICost = HiveCost.ZERO;
+    for (RelNode input : inputs) {
+      RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input);
+      if (maxICost.isLt(iCost)) {
+        maxICost = iCost;
+      }
+    }
+    return cost.plus(maxICost);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,439 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdRowCount;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class HiveRelMdRowCount extends RelMdRowCount {
+
+  protected static final Log LOG  = LogFactory.getLog(HiveRelMdRowCount.class.getName());
+
+
+  public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+      .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new HiveRelMdRowCount());
+
+  protected HiveRelMdRowCount() {
+    super();
+  }
+
+  public Double getRowCount(Join join) {
+    PKFKRelationInfo pkfk = analyzeJoinForPKFK(join);
+    if (pkfk != null) {
+      double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+      selectivity = Math.min(1.0, selectivity);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Identified Primary - Foreign Key relation:");
+        LOG.debug(RelOptUtil.toString(join));
+        LOG.debug(pkfk);
+      }
+      return pkfk.fkInfo.rowCount * selectivity;
+    }
+    return join.getRows();
+  }
+
+  public Double getRowCount(SemiJoin rel) {
+    PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel);
+    if (pkfk != null) {
+      double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+      selectivity = Math.min(1.0, selectivity);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Identified Primary - Foreign Key relation:");
+        LOG.debug(RelOptUtil.toString(rel));
+        LOG.debug(pkfk);
+      }
+      return pkfk.fkInfo.rowCount * selectivity;
+    }
+    return super.getRowCount(rel);
+  }
+
+  static class PKFKRelationInfo {
+    public final int fkSide;
+    public final double ndvScalingFactor;
+    public final FKSideInfo fkInfo;
+    public final PKSideInfo pkInfo;
+    public final boolean isPKSideSimple;
+
+    PKFKRelationInfo(int fkSide,
+        FKSideInfo fkInfo,
+        PKSideInfo pkInfo,
+        double ndvScalingFactor,
+        boolean isPKSideSimple) {
+      this.fkSide = fkSide;
+      this.fkInfo = fkInfo;
+      this.pkInfo = pkInfo;
+      this.ndvScalingFactor = ndvScalingFactor;
+      this.isPKSideSimple = isPKSideSimple;
+    }
+
+    public String toString() {
+      return String.format(
+          "Primary - Foreign Key join:\n\tfkSide = %d\n\tFKInfo:%s\n" +
+          "\tPKInfo:%s\n\tisPKSideSimple:%s\n\tNDV Scaling Factor:%.2f\n",
+          fkSide,
+          fkInfo,
+          pkInfo,
+          isPKSideSimple,
+          ndvScalingFactor);
+    }
+  }
+
+  static class FKSideInfo {
+    public final double rowCount;
+    public final double distinctCount;
+    public FKSideInfo(double rowCount, double distinctCount) {
+      this.rowCount = rowCount;
+      this.distinctCount = distinctCount;
+    }
+
+    public String toString() {
+      return String.format("FKInfo(rowCount=%.2f,ndv=%.2f)", rowCount, distinctCount);
+    }
+  }
+
+  static class PKSideInfo extends FKSideInfo {
+    public final double selectivity;
+    public PKSideInfo(double rowCount, double distinctCount, double selectivity) {
+      super(rowCount, distinctCount);
+      this.selectivity = selectivity;
+    }
+
+    public String toString() {
+      return String.format("PKInfo(rowCount=%.2f,ndv=%.2f,selectivity=%.2f)", rowCount, distinctCount,selectivity);
+    }
+  }
+
+  /*
+   * For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can
+   * infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is
+   * like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor
+   * based on the Selectivity of the PK/Dim table side.
+   *
+   * 1. If both T1.x and T2.y are keys then use the larger one as the PK side.
+   * 2. In case of outer Joins: a) The FK side should be the Null Preserving
+   * side. It doesn't make sense to apply this heuristic in case of Dim loj Fact
+   * or Fact roj Dim b) The selectivity factor applied on the Fact Table should
+   * be 1.
+   */
+  public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
+
+    RelNode left = joinRel.getInputs().get(0);
+    RelNode right = joinRel.getInputs().get(1);
+
+    final List<RexNode> initJoinFilters = RelOptUtil.conjunctions(joinRel
+        .getCondition());
+
+    /*
+     * No joining condition.
+     */
+    if (initJoinFilters.isEmpty()) {
+      return null;
+    }
+
+    List<RexNode> leftFilters = new ArrayList<RexNode>();
+    List<RexNode> rightFilters = new ArrayList<RexNode>();
+    List<RexNode> joinFilters = new ArrayList<RexNode>(initJoinFilters);
+
+    // @todo: remove this. 8/28/14 hb
+    // for now adding because RelOptUtil.classifyFilters has an assertion about
+    // column counts that is not true for semiJoins.
+    if (joinRel instanceof SemiJoin) {
+      return null;
+    }
+
+    RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(),
+        false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel
+            .getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters,
+        rightFilters);
+
+    Pair<Integer, Integer> joinCols = canHandleJoin(joinRel, leftFilters,
+        rightFilters, joinFilters);
+    if (joinCols == null) {
+      return null;
+    }
+    int leftColIdx = joinCols.left;
+    int rightColIdx = joinCols.right;
+
+    RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+    RexNode leftPred = RexUtil
+        .composeConjunction(rexBuilder, leftFilters, true);
+    RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters,
+        true);
+    ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx);
+    ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx);
+
+    /*
+     * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return
+     * null.
+     */
+    boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+        .getJoinType() == JoinRelType.RIGHT)
+        && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left);
+    boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+        .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right);
+
+    if (!leftIsKey && !rightIsKey) {
+      return null;
+    }
+
+    double leftRowCount = RelMetadataQuery.getRowCount(left);
+    double rightRowCount = RelMetadataQuery.getRowCount(right);
+
+    if (leftIsKey && rightIsKey) {
+      if (rightRowCount < leftRowCount) {
+        leftIsKey = false;
+      }
+    }
+
+    int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1;
+
+    boolean isPKSideSimpleTree = pkSide != -1 ? 
+        IsSimpleTreeOnJoinKey.check(
+            pkSide == 0 ? left : right,
+            pkSide == 0 ? leftColIdx : rightColIdx) : false;
+
+   double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1;
+   double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1;
+
+   /*
+    * If the ndv of the PK - FK side don't match, and the PK side is a filter
+    * on the Key column then scale the NDV on the FK side.
+    *
+    * As described by Peter Boncz: http://databasearchitects.blogspot.com/
+    * in such cases we can be off by a large margin in the Join cardinality
+    * estimate. The e.g. he provides is on the join of StoreSales and DateDim
+    * on the TPCDS dataset. Since the DateDim is populated for 20 years into
+    * the future, while the StoreSales only has 5 years worth of data, there
+    * are 40 times fewer distinct dates in StoreSales.
+    *
+    * In general it is hard to infer the range for the foreign key on an
+    * arbitrary expression. For e.g. the NDV for DayofWeek is the same
+    * irrespective of NDV on the number of unique days, whereas the
+    * NDV of Quarters has the same ratio as the NDV on the keys.
+    *
+    * But for expressions that apply only on columns that have the same NDV
+    * as the key (implying that they are alternate keys) we can apply the
+    * ratio. So in the case of StoreSales - DateDim joins for predicate on the
+    * d_date column we can apply the scaling factor.
+    */
+   double ndvScalingFactor = 1.0;
+   if ( isPKSideSimpleTree ) {
+     ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV;
+   }
+
+    if (pkSide == 0) {
+      FKSideInfo fkInfo = new FKSideInfo(rightRowCount,
+          rightNDV);
+      double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount);
+      PKSideInfo pkInfo = new PKSideInfo(leftRowCount,
+          leftNDV,
+          joinRel.getJoinType().generatesNullsOnRight() ? 1.0 :
+            pkSelectivity);
+
+      return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+    }
+
+    if (pkSide == 1) {
+      FKSideInfo fkInfo = new FKSideInfo(leftRowCount,
+          leftNDV);
+      double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount);
+      PKSideInfo pkInfo = new PKSideInfo(rightRowCount,
+          rightNDV,
+          joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 :
+            pkSelectivity);
+
+      return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+    }
+
+    return null;
+  }
+
+  private static double pkSelectivity(Join joinRel, boolean leftChild,
+      RelNode child,
+      double childRowCount) {
+    if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) ||
+        (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) {
+      return 1.0;
+    } else {
+      HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true);
+      if (tScan != null) {
+        double tRowCount = RelMetadataQuery.getRowCount(tScan);
+        return childRowCount / tRowCount;
+      } else {
+        return 1.0;
+      }
+    }
+  }
+
+  private static boolean isKey(ImmutableBitSet c, RelNode rel) {
+    boolean isKey = false;
+    Set<ImmutableBitSet> keys = RelMetadataQuery.getUniqueKeys(rel);
+    if (keys != null) {
+      for (ImmutableBitSet key : keys) {
+        if (key.equals(c)) {
+          isKey = true;
+          break;
+        }
+      }
+    }
+    return isKey;
+  }
+
+  /*
+   * 1. Join condition must be an Equality Predicate.
+   * 2. both sides must reference 1 column.
+   * 3. If needed flip the columns.
+   */
+  private static Pair<Integer, Integer> canHandleJoin(Join joinRel,
+      List<RexNode> leftFilters, List<RexNode> rightFilters,
+      List<RexNode> joinFilters) {
+
+    /*
+     * If after classifying filters there is more than 1 joining predicate, we
+     * don't handle this. Return null.
+     */
+    if (joinFilters.size() != 1) {
+      return null;
+    }
+
+    RexNode joinCond = joinFilters.get(0);
+
+    int leftColIdx;
+    int rightColIdx;
+
+    if (!(joinCond instanceof RexCall)) {
+      return null;
+    }
+
+    if (((RexCall) joinCond).getOperator() != SqlStdOperatorTable.EQUALS) {
+      return null;
+    }
+
+    ImmutableBitSet leftCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(0));
+    ImmutableBitSet rightCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(1));
+
+    if (leftCols.cardinality() != 1 || rightCols.cardinality() != 1 ) {
+      return null;
+    }
+
+    int nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size();
+    int nFieldsRight = joinRel.getRight().getRowType().getFieldList().size();
+    int nSysFields = joinRel.getSystemFieldList().size();
+    ImmutableBitSet rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft,
+        nSysFields + nFieldsLeft + nFieldsRight);
+    /*
+     * flip column references if join condition specified in reverse order to
+     * join sources.
+     */
+    if (rightFieldsBitSet.contains(leftCols)) {
+      ImmutableBitSet t = leftCols;
+      leftCols = rightCols;
+      rightCols = t;
+    }
+
+    leftColIdx = leftCols.nextSetBit(0) - nSysFields;
+    rightColIdx = rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft);
+
+    return new Pair<Integer, Integer>(leftColIdx, rightColIdx);
+  }
+
+  private static class IsSimpleTreeOnJoinKey extends RelVisitor {
+
+    int joinKey;
+    boolean simpleTree;
+
+    static boolean check(RelNode r, int joinKey) {
+      IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey);
+      v.go(r);
+      return v.simpleTree;
+    }
+
+    IsSimpleTreeOnJoinKey(int joinKey) {
+      super();
+      this.joinKey = joinKey;
+      simpleTree = true;
+    }
+
+    @Override
+    public void visit(RelNode node, int ordinal, RelNode parent) {
+
+      if (node instanceof HepRelVertex) {
+        node = ((HepRelVertex) node).getCurrentRel();
+      }
+
+      if (node instanceof TableScan) {
+        simpleTree = true;
+      } else if (node instanceof Project) {
+        simpleTree = isSimple((Project) node);
+      } else if (node instanceof Filter) {
+        simpleTree = isSimple((Filter) node);
+      } else {
+        simpleTree = false;
+      }
+
+      if (simpleTree) {
+        super.visit(node, ordinal, parent);
+      }
+    }
+
+    private boolean isSimple(Project project) {
+      RexNode r = project.getProjects().get(joinKey);
+      if (r instanceof RexInputRef) {
+        joinKey = ((RexInputRef) r).getIndex();
+        return true;
+      }
+      return false;
+    }
+
+    private boolean isSimple(Filter filter) {
+      ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition());
+      return isKey(condBits, filter);
+    }
+
+  }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdSelectivity;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveRelMdSelectivity extends RelMdSelectivity {
+  public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource(
+                                                     BuiltInMethod.SELECTIVITY.method,
+                                                     new HiveRelMdSelectivity());
+
+  protected HiveRelMdSelectivity() {
+    super();
+  }
+
+  public Double getSelectivity(HiveTableScan t, RexNode predicate) {
+    if (predicate != null) {
+      FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t);
+      return filterSelEstmator.estimateSelectivity(predicate);
+    }
+
+    return 1.0;
+  }
+
+  public Double getSelectivity(HiveJoin j, RexNode predicate) {
+    if (j.getJoinType().equals(JoinRelType.INNER)) {
+      return computeInnerJoinSelectivity(j, predicate);
+    }
+    return 1.0;
+  }
+
+  private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) {
+    double ndvCrossProduct = 1;
+    Pair<Boolean, RexNode> predInfo =
+        getCombinedPredicateForJoin(j, predicate);
+    if (!predInfo.getKey()) {
+      return
+          new FilterSelectivityEstimator(j).
+          estimateSelectivity(predInfo.getValue());
+    }
+
+    RexNode combinedPredicate = predInfo.getValue();
+    JoinPredicateInfo jpi = JoinPredicateInfo.constructJoinPredicateInfo(j,
+        combinedPredicate);
+    ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap
+        .builder();
+    ImmutableMap<Integer, Double> colStatMap;
+    int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+    // 1. Update Col Stats Map with col stats for columns from left side of
+    // Join which are part of join keys
+    for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+      colStatMapBuilder.put(ljk,
+          HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk));
+    }
+
+    // 2. Update Col Stats Map with col stats for columns from right side of
+    // Join which are part of join keys
+    for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+      colStatMapBuilder.put(rjk + rightOffSet,
+          HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk));
+    }
+    colStatMap = colStatMapBuilder.build();
+
+    // 3. Walk through the Join Condition Building NDV for selectivity
+    // NDV of the join can not exceed the cardinality of cross join.
+    List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
+    int noOfPE = peLst.size();
+    if (noOfPE > 0) {
+      ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
+
+      if (j.isLeftSemiJoin())
+        ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
+            ndvCrossProduct);
+      else
+        ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
+            * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+    }
+
+    // 4. Join Selectivity = 1/NDV
+    return (1 / ndvCrossProduct);
+  }
+
+  // 3.2 if conjunctive predicate elements are more than one, then walk
+  // through them one by one. Compute cross product of NDV. Cross product is
+  // computed by multiplying the largest NDV of all of the conjunctive
+  // predicate
+  // elements with degraded NDV of rest of the conjunctive predicate
+  // elements. NDV is
+  // degraded using log function.Finally the ndvCrossProduct is fenced at
+  // the join
+  // cross product to ensure that NDV can not exceed worst case join
+  // cardinality.<br>
+  // NDV of a conjunctive predicate element is the max NDV of all arguments
+  // to lhs, rhs expressions.
+  // NDV(JoinCondition) = min (left cardinality * right cardinality,
+  // ndvCrossProduct(JoinCondition))
+  // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2))
+  // where pex is the predicate element of join condition with max ndv.
+  // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr))
+  // NDV(expr) = max(NDV( expr args))
+  protected double logSmoothing(List<JoinLeafPredicateInfo> peLst, ImmutableMap<Integer, Double> colStatMap) {
+    int noOfPE = peLst.size();
+    double ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap);
+    if (noOfPE > 1) {
+      double maxNDVSoFar = ndvCrossProduct;
+      double ndvToBeSmoothed;
+      double tmpNDV;
+
+      for (int i = 1; i < noOfPE; i++) {
+        tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap);
+        if (tmpNDV > maxNDVSoFar) {
+          ndvToBeSmoothed = maxNDVSoFar;
+          maxNDVSoFar = tmpNDV;
+          ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV;
+        } else {
+          ndvToBeSmoothed = tmpNDV;
+        }
+        // TODO: revisit the fence
+        if (ndvToBeSmoothed > 3)
+          ndvCrossProduct *= Math.log(ndvToBeSmoothed);
+        else
+          ndvCrossProduct *= ndvToBeSmoothed;
+      }
+    }
+    return ndvCrossProduct;
+  }
+
+  /*
+   * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct =
+   * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ...
+   */
+  protected double exponentialBackoff(List<JoinLeafPredicateInfo> peLst,
+      ImmutableMap<Integer, Double> colStatMap) {
+    int noOfPE = peLst.size();
+    List<Double> ndvs = new ArrayList<Double>(noOfPE);
+    for (int i = 0; i < noOfPE; i++) {
+      ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap));
+    }
+    Collections.sort(ndvs);
+    Collections.reverse(ndvs);
+    double ndvCrossProduct = 1.0;
+    for (int i = 0; i < ndvs.size(); i++) {
+      double n = Math.pow(ndvs.get(i), Math.pow(1 / 2.0, i));
+      ndvCrossProduct *= n;
+    }
+    return ndvCrossProduct;
+  }
+
+  /**
+   * 
+   * @param j
+   * @param additionalPredicate
+   * @return if predicate is the join condition return (true, joinCond)
+   * else return (false, minusPred)
+   */
+  private Pair<Boolean,RexNode> getCombinedPredicateForJoin(HiveJoin j, RexNode additionalPredicate) {
+    RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate,
+        j.getCondition());
+
+    if (minusPred != null) {
+      List<RexNode> minusList = new ArrayList<RexNode>();
+      minusList.add(j.getCondition());
+      minusList.add(minusPred);
+
+      return new Pair<Boolean,RexNode>(false, minusPred);
+    }
+
+    return new Pair<Boolean,RexNode>(true,j.getCondition());
+  }
+
+  /**
+   * Compute Max NDV to determine Join Selectivity.
+   * 
+   * @param jlpi
+   * @param colStatMap
+   *          Immutable Map of Projection Index (in Join Schema) to Column Stat
+   * @param rightProjOffSet
+   * @return
+   */
+  private static Double getMaxNDVForJoinSelectivity(JoinLeafPredicateInfo jlpi,
+      ImmutableMap<Integer, Double> colStatMap) {
+    Double maxNDVSoFar = 1.0;
+
+    maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+        jlpi.getProjsFromLeftPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+    maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+        jlpi.getProjsFromRightPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+
+    return maxNDVSoFar;
+  }
+
+  private static Double getMaxNDVFromProjections(Map<Integer, Double> colStatMap,
+      Set<Integer> projectionSet, Double defaultMaxNDV) {
+    Double colNDV = null;
+    Double maxNDVSoFar = defaultMaxNDV;
+
+    for (Integer projIndx : projectionSet) {
+      colNDV = colStatMap.get(projIndx);
+      if (colNDV > maxNDVSoFar)
+        maxNDVSoFar = colNDV;
+    }
+
+    return maxNDVSoFar;
+  }
+  
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java Tue Dec  9 23:02:22 2014
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.BuiltInMetadata;
+import org.apache.calcite.rel.metadata.Metadata;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdUniqueKeys;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.base.Function;
+
+public class HiveRelMdUniqueKeys {
+
+  public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+      .reflectiveSource(BuiltInMethod.UNIQUE_KEYS.method,
+          new HiveRelMdUniqueKeys());
+
+  /*
+   * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
+   * - min(col) = rowCount(col)
+   * 
+   * Why are we intercepting Project and not TableScan? Because if we
+   * have a method for TableScan, it will not know which columns to check for.
+   * Inferring Uniqueness for all columns is very expensive right now. The flip
+   * side of doing this is, it only works post Field Trimming.
+   */
+  public Set<ImmutableBitSet> getUniqueKeys(Project rel, boolean ignoreNulls) {
+
+    HiveTableScan tScan = getTableScan(rel.getInput(), false);
+
+    if ( tScan == null ) {
+      Function<RelNode, Metadata> fn = RelMdUniqueKeys.SOURCE.apply(
+          rel.getClass(), BuiltInMetadata.UniqueKeys.class);
+      return ((BuiltInMetadata.UniqueKeys) fn.apply(rel))
+          .getUniqueKeys(ignoreNulls);
+    }
+
+    Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
+    int projectPos = 0;
+    int colStatsPos = 0;
+
+    BitSet projectedCols = new BitSet();
+    for (RexNode r : rel.getProjects()) {
+      if (r instanceof RexInputRef) {
+        projectedCols.set(((RexInputRef) r).getIndex());
+        posMap.put(colStatsPos, projectPos);
+        colStatsPos++;
+      }
+      projectPos++;
+    }
+
+    double numRows = tScan.getRows();
+    List<ColStatistics> colStats = tScan.getColStat(BitSets
+        .toList(projectedCols));
+    Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
+
+    colStatsPos = 0;
+    for (ColStatistics cStat : colStats) {
+      boolean isKey = false;
+      if (cStat.getCountDistint() >= numRows) {
+        isKey = true;
+      }
+      if ( !isKey && cStat.getRange() != null &&
+          cStat.getRange().maxValue != null  &&
+          cStat.getRange().minValue != null) {
+        double r = cStat.getRange().maxValue.doubleValue() - 
+            cStat.getRange().minValue.doubleValue() + 1;
+        isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
+      }
+      if ( isKey ) {
+        ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
+        keys.add(key);
+      }
+      colStatsPos++;
+    }
+
+    return keys;
+  }
+
+  /*
+   * traverse a path of Filter, Projects to get to the TableScan.
+   * In case of Unique keys, stop if you reach a Project, it will be handled
+   * by the invocation on the Project.
+   * In case of getting the base rowCount of a Path, keep going past a Project.
+   */
+  static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
+
+    while (r != null && !(r instanceof HiveTableScan)) {
+      if (r instanceof HepRelVertex) {
+        r = ((HepRelVertex) r).getCurrentRel();
+      } else if (r instanceof Filter) {
+        r = ((Filter) r).getInput();
+      } else if (traverseProject && r instanceof Project) {
+        r = ((Project) r).getInput();
+      } else {
+        r = null;
+      }
+    }
+    return r == null ? null : (HiveTableScan) r;
+  }
+
+}



Mime
View raw message