hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpull...@apache.org
Subject svn commit: r1643732 [1/3] - in /hive/trunk: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Mon, 08 Dec 2014 04:42:59 GMT
Author: jpullokk
Date: Mon Dec  8 04:42:59 2014
New Revision: 1643732

URL: http://svn.apache.org/r1643732
Log:
HIVE-8774 Fix groupBy index optimization (Pengcheng Xiong via Laljo John Pullokkaran)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q
    hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q
    hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out
    hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out
Modified:
    hive/trunk/itests/src/test/resources/testconfiguration.properties
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
    hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out

Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1643732&r1=1643731&r2=1643732&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Mon Dec  8 04:42:59 2014
@@ -31,6 +31,8 @@ minimr.query.files=auto_sortmerge_join_1
   optrstat_groupby.q,\
   parallel_orderby.q,\
   ql_rewrite_gbtoidx.q,\
+  ql_rewrite_gbtoidx_cbo_1.q,\
+  ql_rewrite_gbtoidx_cbo_2.q,\
   quotedid_smb.q,\
   reduce_deduplicate.q,\
   remote_script.q,\

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java?rev=1643732&r1=1643731&r2=1643732&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java Mon Dec  8 04:42:59 2014
@@ -21,17 +21,16 @@ package org.apache.hadoop.hive.ql.optimi
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.Stack;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -63,47 +62,31 @@ public final class RewriteCanApplyCtx im
   }
 
   // Rewrite Variables
-  private int aggFuncCnt = 0;
+  private boolean selClauseColsFetchException = false;
   private boolean queryHasGroupBy = false;
   private boolean aggFuncIsNotCount = false;
-  private boolean aggFuncColsFetchException = false;
-  private boolean whrClauseColsFetchException = false;
-  private boolean selClauseColsFetchException = false;
-  private boolean gbyKeysFetchException = false;
-  private boolean countOnAllCols = false;
-  private boolean countOfOne = false;
-  private boolean queryHasMultipleTables = false;
-
-  //Data structures that are populated in the RewriteCanApplyProcFactory
-  //methods to check if the index key meets all criteria
-  private Set<String> selectColumnsList = new LinkedHashSet<String>();
-  private Set<String> predicateColumnsList = new LinkedHashSet<String>();
-  private Set<String> gbKeyNameList = new LinkedHashSet<String>();
-  private Set<String> aggFuncColList = new LinkedHashSet<String>();
+  private boolean aggParameterException = false;
+
+  //The most important, indexKey
+  private String indexKey;
 
   private final ParseContext parseContext;
   private String alias;
   private String baseTableName;
   private String indexTableName;
   private String aggFunction;
+  
+  private TableScanOperator tableScanOperator;
+  private List<SelectOperator> selectOperators;
+  private List<GroupByOperator> groupByOperators;
 
   void resetCanApplyCtx(){
-    setAggFuncCnt(0);
     setQueryHasGroupBy(false);
     setAggFuncIsNotCount(false);
-    setAggFuncColsFetchException(false);
-    setWhrClauseColsFetchException(false);
     setSelClauseColsFetchException(false);
-    setGbyKeysFetchException(false);
-    setCountOnAllCols(false);
-    setCountOfOne(false);
-    setQueryHasMultipleTables(false);
-    selectColumnsList.clear();
-    predicateColumnsList.clear();
-    gbKeyNameList.clear();
-    aggFuncColList.clear();
     setBaseTableName("");
     setAggFunction("");
+    setIndexKey("");
   }
 
   public boolean isQueryHasGroupBy() {
@@ -134,22 +117,6 @@ public final class RewriteCanApplyCtx im
     return aggFunction;
   }
 
-  public void setAggFuncColsFetchException(boolean aggFuncColsFetchException) {
-    this.aggFuncColsFetchException = aggFuncColsFetchException;
-  }
-
-  public boolean isAggFuncColsFetchException() {
-    return aggFuncColsFetchException;
-  }
-
-  public void setWhrClauseColsFetchException(boolean whrClauseColsFetchException) {
-    this.whrClauseColsFetchException = whrClauseColsFetchException;
-  }
-
-  public boolean isWhrClauseColsFetchException() {
-    return whrClauseColsFetchException;
-  }
-
   public void setSelClauseColsFetchException(boolean selClauseColsFetchException) {
     this.selClauseColsFetchException = selClauseColsFetchException;
   }
@@ -158,78 +125,6 @@ public final class RewriteCanApplyCtx im
     return selClauseColsFetchException;
   }
 
-  public void setGbyKeysFetchException(boolean gbyKeysFetchException) {
-    this.gbyKeysFetchException = gbyKeysFetchException;
-  }
-
-  public boolean isGbyKeysFetchException() {
-    return gbyKeysFetchException;
-  }
-
-  public void setCountOnAllCols(boolean countOnAllCols) {
-    this.countOnAllCols = countOnAllCols;
-  }
-
-  public boolean isCountOnAllCols() {
-    return countOnAllCols;
-  }
-
-  public void setCountOfOne(boolean countOfOne) {
-    this.countOfOne = countOfOne;
-  }
-
-  public boolean isCountOfOne() {
-    return countOfOne;
-  }
-
-  public void setQueryHasMultipleTables(boolean queryHasMultipleTables) {
-    this.queryHasMultipleTables = queryHasMultipleTables;
-  }
-
-  public boolean isQueryHasMultipleTables() {
-    return queryHasMultipleTables;
-  }
-
-  public Set<String> getSelectColumnsList() {
-    return selectColumnsList;
-  }
-
-  public void setSelectColumnsList(Set<String> selectColumnsList) {
-    this.selectColumnsList = selectColumnsList;
-  }
-
-  public Set<String> getPredicateColumnsList() {
-    return predicateColumnsList;
-  }
-
-  public void setPredicateColumnsList(Set<String> predicateColumnsList) {
-    this.predicateColumnsList = predicateColumnsList;
-  }
-
-  public Set<String> getGbKeyNameList() {
-    return gbKeyNameList;
-  }
-
-  public void setGbKeyNameList(Set<String> gbKeyNameList) {
-    this.gbKeyNameList = gbKeyNameList;
-  }
-
-  public Set<String> getAggFuncColList() {
-    return aggFuncColList;
-  }
-
-  public void setAggFuncColList(Set<String> aggFuncColList) {
-    this.aggFuncColList = aggFuncColList;
-  }
-
-   public int getAggFuncCnt() {
-    return aggFuncCnt;
-  }
-
-  public void setAggFuncCnt(int aggFuncCnt) {
-    this.aggFuncCnt = aggFuncCnt;
-  }
-
   public String getAlias() {
     return alias;
   }
@@ -258,15 +153,6 @@ public final class RewriteCanApplyCtx im
     return parseContext;
   }
 
-  public Set<String> getAllColumns() {
-    Set<String> allColumns = new LinkedHashSet<String>(selectColumnsList);
-    allColumns.addAll(predicateColumnsList);
-    allColumns.addAll(gbKeyNameList);
-    allColumns.addAll(aggFuncColList);
-    return allColumns;
-  }
-
-
   /**
    * This method walks all the nodes starting from topOp TableScanOperator node
    * and invokes methods from {@link RewriteCanApplyProcFactory} for each of the rules
@@ -282,10 +168,14 @@ public final class RewriteCanApplyCtx im
   void populateRewriteVars(TableScanOperator topOp)
     throws SemanticException{
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-    opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"),
-        RewriteCanApplyProcFactory.canApplyOnFilterOperator(topOp));
-    opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"),
-        RewriteCanApplyProcFactory.canApplyOnGroupByOperator(topOp));
+    //^TS%[(SEL%)|(FIL%)]*GRY%[(FIL%)]*RS%[(FIL%)]*GRY%
+    opRules.put(
+        new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%[("
+            + SelectOperator.getOperatorName() + "%)|(" + FilterOperator.getOperatorName() + "%)]*"
+            + GroupByOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() + "%]*"
+            + ReduceSinkOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName()
+            + "%]*" + GroupByOperator.getOperatorName() + "%"),
+        RewriteCanApplyProcFactory.canApplyOnTableScanOperator(topOp));
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
@@ -323,67 +213,53 @@ public final class RewriteCanApplyCtx im
 
   //Map for base table to index table mapping
   //TableScan operator for base table will be modified to read from index table
-  private final Map<String, String> baseToIdxTableMap =
-    new HashMap<String, String>();;
-
+  private final Map<String, String> baseToIdxTableMap = new HashMap<String, String>();;
 
   public void addTable(String baseTableName, String indexTableName) {
-     baseToIdxTableMap.put(baseTableName, indexTableName);
-   }
+    baseToIdxTableMap.put(baseTableName, indexTableName);
+  }
 
-   public String findBaseTable(String baseTableName)  {
-     return baseToIdxTableMap.get(baseTableName);
-   }
-
-
-  boolean isIndexUsableForQueryBranchRewrite(Index index, Set<String> indexKeyNames){
-
-    //--------------------------------------------
-    //Check if all columns in select list are part of index key columns
-    if (!indexKeyNames.containsAll(selectColumnsList)) {
-      LOG.info("Select list has non index key column : " +
-          " Cannot use index " + index.getIndexName());
-      return false;
-    }
+  public String findBaseTable(String baseTableName) {
+    return baseToIdxTableMap.get(baseTableName);
+  }
 
-    //--------------------------------------------
-    // Check if all columns in where predicate are part of index key columns
-    if (!indexKeyNames.containsAll(predicateColumnsList)) {
-      LOG.info("Predicate column ref list has non index key column : " +
-          " Cannot use index  " + index.getIndexName());
-      return false;
-    }
+  public String getIndexKey() {
+    return indexKey;
+  }
 
-      //--------------------------------------------
-      // For group by, we need to check if all keys are from index columns
-      // itself. Here GB key order can be different than index columns but that does
-      // not really matter for final result.
-      if (!indexKeyNames.containsAll(gbKeyNameList)) {
-        LOG.info("Group by key has some non-indexed columns, " +
-            " Cannot use index  " + index.getIndexName());
-        return false;
-      }
+  public void setIndexKey(String indexKey) {
+    this.indexKey = indexKey;
+  }
 
-      // If we have agg function (currently only COUNT is supported), check if its inputs are
-      // from index. we currently support only that.
-      if (aggFuncColList.size() > 0)  {
-        if (!indexKeyNames.containsAll(aggFuncColList)){
-          LOG.info("Agg Func input is not present in index key columns. Currently " +
-              "only agg func on index columns are supported by rewrite optimization");
-          return false;
-        }
-      }
+  public TableScanOperator getTableScanOperator() {
+    return tableScanOperator;
+  }
 
-    //Now that we are good to do this optimization, set parameters in context
-    //which would be used by transformation procedure as inputs.
-    if(queryHasGroupBy
-        && aggFuncCnt == 1
-        && !aggFuncIsNotCount){
-      addTable(baseTableName, index.getIndexTableName());
-    }else{
-      LOG.info("No valid criteria met to apply rewrite.");
-      return false;
-    }
-    return true;
+  public void setTableScanOperator(TableScanOperator tableScanOperator) {
+    this.tableScanOperator = tableScanOperator;
+  }
+
+  public List<SelectOperator> getSelectOperators() {
+    return selectOperators;
+  }
+
+  public void setSelectOperators(List<SelectOperator> selectOperators) {
+    this.selectOperators = selectOperators;
+  }
+
+  public List<GroupByOperator> getGroupByOperators() {
+    return groupByOperators;
+  }
+
+  public void setGroupByOperators(List<GroupByOperator> groupByOperators) {
+    this.groupByOperators = groupByOperators;
+  }
+
+  public void setAggParameterException(boolean aggParameterException) {
+    this.aggParameterException = aggParameterException;
+  }
+
+  public boolean isAggParameterException() {
+    return aggParameterException;
   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java?rev=1643732&r1=1643731&r2=1643732&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java Mon Dec  8 04:42:59 2014
@@ -18,8 +18,9 @@
 
 package org.apache.hadoop.hive.ql.optimizer.index;
 
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -27,13 +28,12 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Stack;
 
@@ -43,154 +43,74 @@ import java.util.Stack;
  *
  */
 public final class RewriteCanApplyProcFactory {
+  public static CheckTableScanProc canApplyOnTableScanOperator(TableScanOperator topOp) {
+    return new CheckTableScanProc();
+  }
 
-  /**
-   * Check for conditions in FilterOperator that do not meet rewrite criteria.
-   */
-  private static class CheckFilterProc implements NodeProcessor {
-
-    private TableScanOperator topOp;
-
-    public CheckFilterProc(TableScanOperator topOp) {
-      this.topOp = topOp;
+  private static class CheckTableScanProc implements NodeProcessor {
+    public CheckTableScanProc() {
     }
 
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
-        Object... nodeOutputs) throws SemanticException {
-      FilterOperator operator = (FilterOperator)nd;
-      RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx;
-      FilterDesc conf = operator.getConf();
-      //The filter operator should have a predicate of ExprNodeGenericFuncDesc type.
-      //This represents the comparison operator
-      ExprNodeDesc oldengfd = conf.getPredicate();
-      if(oldengfd == null){
-        canApplyCtx.setWhrClauseColsFetchException(true);
-        return null;
-      }
-      ExprNodeDesc backtrack = ExprNodeDescUtils.backtrack(oldengfd, operator, topOp);
-      if (backtrack == null) {
-        canApplyCtx.setWhrClauseColsFetchException(true);
-        return null;
-      }
-      //Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later
-      //if index keys contain all filter predicate columns and vice-a-versa
-      for (String col : backtrack.getCols()) {
-        canApplyCtx.getPredicateColumnsList().add(col);
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
+        throws SemanticException {
+      RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx;
+      for (Node node : stack) {
+        // For table scan operator,
+        // check ReferencedColumns to make sure that only the index column is
+        // selected for the following operators.
+        if (node instanceof TableScanOperator) {
+          TableScanOperator ts = (TableScanOperator) node;
+          canApplyCtx.setTableScanOperator(ts);
+          List<String> selectColumns = ts.getConf().getReferencedColumns();
+          if (selectColumns == null || selectColumns.size() != 1) {
+            canApplyCtx.setSelClauseColsFetchException(true);
+            return null;
+          } else {
+            canApplyCtx.setIndexKey(selectColumns.get(0));
+          }
+        } else if (node instanceof SelectOperator) {
+          // For select operators in the stack, we just add them
+          if (canApplyCtx.getSelectOperators() == null) {
+            canApplyCtx.setSelectOperators(new ArrayList<SelectOperator>());
+          }
+          canApplyCtx.getSelectOperators().add((SelectOperator) node);
+        } else if (node instanceof GroupByOperator) {
+          if (canApplyCtx.getGroupByOperators() == null) {
+            canApplyCtx.setGroupByOperators(new ArrayList<GroupByOperator>());
+          }
+          // According to the pre-order,
+          // the first GroupbyOperator is the one before RS
+          // and the second one is the one after RS
+          GroupByOperator operator = (GroupByOperator) node;
+          canApplyCtx.getGroupByOperators().add(operator);
+          if (!canApplyCtx.isQueryHasGroupBy()) {
+            canApplyCtx.setQueryHasGroupBy(true);
+            GroupByDesc conf = operator.getConf();
+            List<AggregationDesc> aggrList = conf.getAggregators();
+            if (aggrList == null || aggrList.size() != 1
+                || !("count".equals(aggrList.get(0).getGenericUDAFName()))) {
+              // In the current implementation, we make sure that only count is
+              // in the function
+              canApplyCtx.setAggFuncIsNotCount(true);
+              return null;
+            } else {
+              List<ExprNodeDesc> para = aggrList.get(0).getParameters();
+              if (para == null || para.size() == 0 || para.size() > 1) {
+                canApplyCtx.setAggParameterException(true);
+                return null;
+              } else {
+                ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator,
+                    (Operator<OperatorDesc>) stack.get(0));
+                if (!(expr instanceof ExprNodeColumnDesc)) {
+                  canApplyCtx.setAggParameterException(true);
+                  return null;
+                }
+              }
+            }
+          }
+        }
       }
       return null;
     }
   }
-
- public static CheckFilterProc canApplyOnFilterOperator(TableScanOperator topOp) {
-    return new CheckFilterProc(topOp);
-  }
-
-   /**
-   * Check for conditions in GroupByOperator that do not meet rewrite criteria.
-   *
-   */
-  private static class CheckGroupByProc implements NodeProcessor {
-
-     private TableScanOperator topOp;
-
-     public CheckGroupByProc(TableScanOperator topOp) {
-       this.topOp = topOp;
-     }
-
-     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
-         Object... nodeOutputs) throws SemanticException {
-       GroupByOperator operator = (GroupByOperator)nd;
-       RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx;
-       //for each group-by clause in query, only one GroupByOperator of the
-       //GBY-RS-GBY sequence is stored in  getGroupOpToInputTables
-       //we need to process only this operator
-       //Also, we do not rewrite for cases when same query branch has multiple group-by constructs
-       if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) &&
-           !canApplyCtx.isQueryHasGroupBy()){
-
-         canApplyCtx.setQueryHasGroupBy(true);
-         GroupByDesc conf = operator.getConf();
-         List<AggregationDesc> aggrList = conf.getAggregators();
-         if(aggrList != null && aggrList.size() > 0){
-             for (AggregationDesc aggregationDesc : aggrList) {
-               canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1);
-               //In the current implementation, we do not support more than 1 agg funcs in group-by
-               if(canApplyCtx.getAggFuncCnt() > 1) {
-                 return false;
-               }
-               String aggFunc = aggregationDesc.getGenericUDAFName();
-               if(!("count".equals(aggFunc))){
-                 canApplyCtx.setAggFuncIsNotCount(true);
-                 return false;
-               }
-               List<ExprNodeDesc> para = aggregationDesc.getParameters();
-               //for a valid aggregation, it needs to have non-null parameter list
-               if (para == null) {
-                 canApplyCtx.setAggFuncColsFetchException(true);
-               } else if (para.size() == 0) {
-                 //count(*) case
-                 canApplyCtx.setCountOnAllCols(true);
-                 canApplyCtx.setAggFunction("_count_of_all");
-               } else if (para.size() == 1) {
-                 ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp);
-                 if (expr instanceof ExprNodeColumnDesc){
-                   //Add the columns to RewriteCanApplyCtx's selectColumnsList list
-                   //to check later if index keys contain all select clause columns
-                   //and vice-a-versa. We get the select column 'actual' names only here
-                   //if we have a agg func along with group-by
-                   //SelectOperator has internal names in its colList data structure
-                   canApplyCtx.getSelectColumnsList().add(
-                       ((ExprNodeColumnDesc) expr).getColumn());
-                   //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
-                   //if columns contained in agg func are index key columns
-                   canApplyCtx.getAggFuncColList().add(
-                       ((ExprNodeColumnDesc) expr).getColumn());
-                   canApplyCtx.setAggFunction("_count_of_" +
-                       ((ExprNodeColumnDesc) expr).getColumn() + "");
-                 } else if(expr instanceof ExprNodeConstantDesc) {
-                   //count(1) case
-                   canApplyCtx.setCountOfOne(true);
-                   canApplyCtx.setAggFunction("_count_of_1");
-                 }
-               } else {
-                 throw new SemanticException("Invalid number of arguments for count");
-               }
-             }
-         }
-
-         //we need to have non-null group-by keys for a valid group-by operator
-         List<ExprNodeDesc> keyList = conf.getKeys();
-         if(keyList == null || keyList.size() == 0){
-           canApplyCtx.setGbyKeysFetchException(true);
-         }
-         for (ExprNodeDesc expr : keyList) {
-           checkExpression(canApplyCtx, expr);
-         }
-       }
-       return null;
-     }
-
-     private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){
-       if(expr instanceof ExprNodeColumnDesc){
-         //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later
-         //if all keys are from index columns
-         canApplyCtx.getGbKeyNameList().addAll(expr.getCols());
-       }else if(expr instanceof ExprNodeGenericFuncDesc){
-         ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc)expr;
-         List<ExprNodeDesc> childExprs = funcExpr.getChildren();
-         for (ExprNodeDesc childExpr : childExprs) {
-           if(childExpr instanceof ExprNodeColumnDesc){
-             canApplyCtx.getGbKeyNameList().addAll(expr.getCols());
-             canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn());
-           }else if(childExpr instanceof ExprNodeGenericFuncDesc){
-             checkExpression(canApplyCtx, childExpr);
-           }
-         }
-       }
-     }
-   }
-
-   public static CheckGroupByProc canApplyOnGroupByOperator(TableScanOperator topOp) {
-     return new CheckGroupByProc(topOp);
-   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java?rev=1643732&r1=1643731&r2=1643732&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java Mon Dec  8 04:42:59 2014
@@ -21,10 +21,7 @@ package org.apache.hadoop.hive.ql.optimi
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -49,7 +46,6 @@ import org.apache.hadoop.hive.ql.parse.O
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.util.StringUtils;
 
 
 /**
@@ -153,10 +149,6 @@ public class RewriteGBUsingIndex impleme
    * @throws SemanticException
    */
   boolean shouldApplyOptimization() throws SemanticException {
-    if (ifQueryHasMultipleTables()) {
-      //We do not apply this optimization for this case as of now.
-      return false;
-    }
     Map<Table, List<Index>> tableToIndex = getIndexesForRewrite();
     if (tableToIndex.isEmpty()) {
       LOG.debug("No Valid Index Found to apply Rewrite, " +
@@ -170,19 +162,14 @@ public class RewriteGBUsingIndex impleme
      * the tsOpToProcess to apply rewrite later on.
      * */
     Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable();
-    Map<String, Operator<?>> topOps = parseContext.getTopOps();
-
     for (Map.Entry<String, Operator<?>> entry : parseContext.getTopOps().entrySet()) {
-
       String alias = entry.getKey();
       TableScanOperator topOp = (TableScanOperator) entry.getValue();
-
       Table table = topToTable.get(topOp);
       List<Index> indexes = tableToIndex.get(table);
       if (indexes.isEmpty()) {
         continue;
       }
-
       if (table.isPartitioned()) {
         //if base table has partitions, we need to check if index is built for
         //all partitions. If not, then we do not apply the optimization
@@ -196,7 +183,6 @@ public class RewriteGBUsingIndex impleme
       //if there are no partitions on base table
       checkIfRewriteCanBeApplied(alias, topOp, table, indexes);
     }
-
     return !tsOpToProcess.isEmpty();
   }
 
@@ -213,26 +199,21 @@ public class RewriteGBUsingIndex impleme
       Table baseTable, List<Index> indexes) throws SemanticException{
     //Context for checking if this optimization can be applied to the input query
     RewriteCanApplyCtx canApplyCtx = RewriteCanApplyCtx.getInstance(parseContext);
-
     canApplyCtx.setAlias(alias);
     canApplyCtx.setBaseTableName(baseTable.getTableName());
     canApplyCtx.populateRewriteVars(topOp);
-
-    Map<Index, Set<String>> indexTableMap = getIndexToKeysMap(indexes);
-    for (Map.Entry<Index, Set<String>> entry : indexTableMap.entrySet()) {
+    Map<Index, String> indexTableMap = getIndexToKeysMap(indexes);
+    for (Map.Entry<Index, String> entry : indexTableMap.entrySet()) {
       //we rewrite the original query using the first valid index encountered
       //this can be changed if we have a better mechanism to
       //decide which index will produce a better rewrite
       Index index = entry.getKey();
-      Set<String> indexKeyNames = entry.getValue();
+      String indexKeyName = entry.getValue();
       //break here if any valid index is found to apply rewrite
-      if (canApplyCtx.isIndexUsableForQueryBranchRewrite(index, indexKeyNames) &&
-          checkIfAllRewriteCriteriaIsMet(canApplyCtx)) {
-        //check if aggregation function is set.
-        //If not, set it using the only indexed column
-        if (canApplyCtx.getAggFunction() == null) {
-          canApplyCtx.setAggFunction("_count_of_" + StringUtils.join(",", indexKeyNames) + "");
-        }
+      if (canApplyCtx.getIndexKey() != null && canApplyCtx.getIndexKey().equals(indexKeyName)
+          && checkIfAllRewriteCriteriaIsMet(canApplyCtx)) {
+        canApplyCtx.setAggFunction("_count_of_" + indexKeyName + "");
+        canApplyCtx.addTable(canApplyCtx.getBaseTableName(), index.getIndexTableName());
         canApplyCtx.setIndexTableName(index.getIndexTableName());
         tsOpToProcess.put(alias, canApplyCtx);
         return true;
@@ -242,27 +223,6 @@ public class RewriteGBUsingIndex impleme
   }
 
   /**
-   * This block of code iterates over the topToTable map from ParseContext
-   * to determine if the query has a scan over multiple tables.
-   * @return
-   */
-  boolean ifQueryHasMultipleTables(){
-    Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable();
-    Iterator<Table> valuesItr = topToTable.values().iterator();
-    Set<String> tableNameSet = new HashSet<String>();
-    while(valuesItr.hasNext()){
-      Table table = valuesItr.next();
-      tableNameSet.add(table.getTableName());
-    }
-    if(tableNameSet.size() > 1){
-      LOG.debug("Query has more than one table " +
-          "that is not supported with " + getName() + " optimization.");
-      return true;
-    }
-    return false;
-  }
-
-  /**
    * Get a list of indexes which can be used for rewrite.
    * @return
    * @throws SemanticException
@@ -319,19 +279,16 @@ public class RewriteGBUsingIndex impleme
    * @return
    * @throws SemanticException
    */
-  Map<Index, Set<String>> getIndexToKeysMap(List<Index> indexTables) throws SemanticException{
+  Map<Index, String> getIndexToKeysMap(List<Index> indexTables) throws SemanticException{
     Hive hiveInstance = hiveDb;
-    Map<Index, Set<String>> indexToKeysMap = new LinkedHashMap<Index, Set<String>>();
+    Map<Index, String> indexToKeysMap = new LinkedHashMap<Index, String>();
      for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++)  {
-      final Set<String> indexKeyNames = new LinkedHashSet<String>();
       Index index = indexTables.get(idxCtr);
        //Getting index key columns
       StorageDescriptor sd = index.getSd();
       List<FieldSchema> idxColList = sd.getCols();
-      for (FieldSchema fieldSchema : idxColList) {
-        indexKeyNames.add(fieldSchema.getName());
-      }
-      assert indexKeyNames.size()==1;
+      assert idxColList.size()==1;
+      String indexKeyName = idxColList.get(0).getName();
       // Check that the index schema is as expected. This code block should
       // catch problems of this rewrite breaking when the AggregateIndexHandler
       // index is changed.
@@ -355,7 +312,7 @@ public class RewriteGBUsingIndex impleme
       // and defer the decision of using a particular index for later
       // this is to allow choosing a index if a better mechanism is
       // designed later to chose a better rewrite
-      indexToKeysMap.put(index, indexKeyNames);
+      indexToKeysMap.put(index, indexKeyName);
     }
     return indexToKeysMap;
   }
@@ -366,20 +323,11 @@ public class RewriteGBUsingIndex impleme
    * @throws SemanticException
    *
    */
-  @SuppressWarnings("unchecked")
   private void rewriteOriginalQuery() throws SemanticException {
-    Map<String, Operator<?>> topOpMap = parseContext.getTopOps();
-    Iterator<String> tsOpItr = tsOpToProcess.keySet().iterator();
-
-    for (Map.Entry<String, RewriteCanApplyCtx> entry : tsOpToProcess.entrySet()) {
-      String alias = entry.getKey();
-      RewriteCanApplyCtx canApplyCtx = entry.getValue();
-      TableScanOperator topOp = (TableScanOperator) topOpMap.get(alias);
+    for (RewriteCanApplyCtx canApplyCtx : tsOpToProcess.values()) {
       RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx =
-        RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb,
-            canApplyCtx.getIndexTableName(), canApplyCtx.getAlias(),
-            canApplyCtx.getAllColumns(), canApplyCtx.getAggFunction());
-      rewriteQueryCtx.invokeRewriteQueryProc(topOp);
+          RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb, canApplyCtx);
+      rewriteQueryCtx.invokeRewriteQueryProc();
       parseContext = rewriteQueryCtx.getParseContext();
       parseContext.setOpParseCtx((LinkedHashMap<Operator<? extends OperatorDesc>,
           OpParseContext>) rewriteQueryCtx.getOpc());
@@ -392,45 +340,20 @@ public class RewriteGBUsingIndex impleme
    * This method logs the reason for which we cannot apply the rewrite optimization.
    * @return
    */
-  boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx){
-    if (canApplyCtx.getAggFuncCnt() > 1){
-      LOG.debug("More than 1 agg funcs: " +
-          "Not supported by " + getName() + " optimization.");
-      return false;
-    }
-    if (canApplyCtx.isAggFuncIsNotCount()){
-      LOG.debug("Agg func other than count is " +
-          "not supported by " + getName() + " optimization.");
-      return false;
-    }
-    if (canApplyCtx.isCountOnAllCols()){
-      LOG.debug("Currently count function needs group by on key columns. This is a count(*) case.,"
-          + "Cannot apply this " + getName() + " optimization.");
-      return false;
-    }
-    if (canApplyCtx.isCountOfOne()){
-      LOG.debug("Currently count function needs group by on key columns. This is a count(1) case.,"
-          + "Cannot apply this " + getName() + " optimization.");
-      return false;
-    }
-    if (canApplyCtx.isAggFuncColsFetchException()){
-      LOG.debug("Got exception while locating child col refs " +
-          "of agg func, skipping " + getName() + " optimization.");
-      return false;
-    }
-    if (canApplyCtx.isWhrClauseColsFetchException()){
-      LOG.debug("Got exception while locating child col refs for where clause, "
-          + "skipping " + getName() + " optimization.");
+  boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx) {
+    if (canApplyCtx.isSelClauseColsFetchException()) {
+      LOG.debug("Got exception while locating child col refs for select list, " + "skipping "
+          + getName() + " optimization.");
       return false;
     }
-    if (canApplyCtx.isSelClauseColsFetchException()){
-      LOG.debug("Got exception while locating child col refs for select list, "
-          + "skipping " + getName() + " optimization.");
+    if (canApplyCtx.isAggFuncIsNotCount()) {
+      LOG.debug("Agg func other than count is " + "not supported by " + getName()
+          + " optimization.");
       return false;
     }
-    if (canApplyCtx.isGbyKeysFetchException()){
-      LOG.debug("Got exception while locating child col refs for GroupBy key, "
-          + "skipping " + getName() + " optimization.");
+    if (canApplyCtx.isAggParameterException()) {
+      LOG.debug("Got exception while locating parameter refs for aggregation, " + "skipping "
+          + getName() + " optimization.");
       return false;
     }
     return true;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java?rev=1643732&r1=1643731&r2=1643732&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java Mon Dec  8 04:42:59 2014
@@ -19,32 +19,46 @@
 package org.apache.hadoop.hive.ql.optimizer.index;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.Stack;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory;
 import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * RewriteQueryUsingAggregateIndexCtx class stores the
@@ -53,37 +67,37 @@ import org.apache.hadoop.hive.ql.udf.gen
  */
 
 public final class RewriteQueryUsingAggregateIndexCtx  implements NodeProcessorCtx {
-
+  private static final Log LOG = LogFactory.getLog(RewriteQueryUsingAggregateIndexCtx.class.getName());
   private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb,
-      String indexTableName, String alias, Set<String> columns, String aggregateFunction) {
+      RewriteCanApplyCtx canApplyCtx) {
     this.parseContext = parseContext;
     this.hiveDb = hiveDb;
-    this.indexTableName = indexTableName;
-    this.alias = alias;
-    this.aggregateFunction = aggregateFunction;
-    this.columns = columns;
+    this.canApplyCtx = canApplyCtx;
+    this.indexTableName = canApplyCtx.getIndexTableName();
+    this.alias = canApplyCtx.getAlias();
+    this.aggregateFunction = canApplyCtx.getAggFunction();
     this.opc = parseContext.getOpParseCtx();
+    this.indexKey = canApplyCtx.getIndexKey();
   }
 
   public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseContext,
-      Hive hiveDb, String indexTableName, String alias,
-      Set<String> columns, String aggregateFunction) {
+      Hive hiveDb, RewriteCanApplyCtx canApplyCtx) {
     return new RewriteQueryUsingAggregateIndexCtx(
-        parseContext, hiveDb, indexTableName, alias, columns, aggregateFunction);
+        parseContext, hiveDb, canApplyCtx);
   }
 
-
   private Map<Operator<? extends OperatorDesc>, OpParseContext> opc =
     new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>();
   private final Hive hiveDb;
   private final ParseContext parseContext;
+  private RewriteCanApplyCtx canApplyCtx;
   //We need the GenericUDAFEvaluator for GenericUDAF function "sum"
   private GenericUDAFEvaluator eval = null;
   private final String indexTableName;
   private final String alias;
   private final String aggregateFunction;
-  private final Set<String> columns;
   private ExprNodeColumnDesc aggrExprNode = null;
+  private String indexKey;
 
   public Map<Operator<? extends OperatorDesc>, OpParseContext> getOpc() {
     return opc;
@@ -117,54 +131,6 @@ public final class RewriteQueryUsingAggr
     return aggrExprNode;
   }
 
- /**
-  * Walk the original operator tree using the {@link DefaultGraphWalker} using the rules.
-  * Each of the rules invoke respective methods from the {@link RewriteQueryUsingAggregateIndex}
-  * to rewrite the original query using aggregate index.
-  *
-  * @param topOp
-  * @throws SemanticException
-  */
-  public void invokeRewriteQueryProc(
-      Operator<? extends OperatorDesc> topOp) throws SemanticException{
-    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-
-    // replace scan operator containing original table with index table
-    opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"),
-        RewriteQueryUsingAggregateIndex.getReplaceTableScanProc());
-    //rule that replaces index key selection with
-    //sum(`_count_of_indexed_column`) function in original query
-    opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + "%"),
-        RewriteQueryUsingAggregateIndex.getNewQuerySelectSchemaProc());
-    //Manipulates the ExprNodeDesc from GroupByOperator aggregation list
-    opRules.put(new RuleRegExp("R3", GroupByOperator.getOperatorName() + "%"),
-        RewriteQueryUsingAggregateIndex.getNewQueryGroupbySchemaProc());
-
-    // The dispatcher fires the processor corresponding to the closest matching
-    // rule and passes the context along
-    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, this);
-    GraphWalker ogw = new DefaultGraphWalker(disp);
-
-    // Create a list of topop nodes
-    List<Node> topNodes = new ArrayList<Node>();
-    topNodes.add(topOp);
-    ogw.startWalking(topNodes, null);
-  }
-
- /**
-  * Default procedure for {@link DefaultRuleDispatcher}.
-  * @return
-  */
-  private NodeProcessor getDefaultProc() {
-    return new NodeProcessor() {
-      @Override
-      public Object process(Node nd, Stack<Node> stack,
-          NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
-        return null;
-      }
-    };
-  }
-
   public String getAlias() {
     return alias;
   }
@@ -173,7 +139,215 @@ public final class RewriteQueryUsingAggr
     return aggregateFunction;
   }
 
-  public Set<String> getColumns() {
-    return columns;
+  public String getIndexKey() {
+    return indexKey;
+  }
+
+  public void setIndexKey(String indexKey) {
+    this.indexKey = indexKey;
+  }
+
+  public void invokeRewriteQueryProc() throws SemanticException {
+    this.replaceTableScanProcess(canApplyCtx.getTableScanOperator());
+    //We need aggrExprNode. Thus, replaceGroupByOperatorProcess should come before replaceSelectOperatorProcess
+    for (int index = 0; index < canApplyCtx.getGroupByOperators().size(); index++) {
+      this.replaceGroupByOperatorProcess(canApplyCtx.getGroupByOperators().get(index), index);
+    }
+    for (SelectOperator selectperator : canApplyCtx.getSelectOperators()) {
+      this.replaceSelectOperatorProcess(selectperator);
+    }
+  }
+  
+  /**
+   * This method replaces the original TableScanOperator with the new
+   * TableScanOperator and metadata that scans over the index table rather than
+   * scanning over the original table.
+   *
+   */
+  private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
+    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
+    String alias = rewriteQueryCtx.getAlias();
+
+    // Need to remove the original TableScanOperators from these data structures
+    // and add new ones
+    Map<TableScanOperator, Table> topToTable = rewriteQueryCtx.getParseContext().getTopToTable();
+    Map<String, Operator<? extends OperatorDesc>> topOps = rewriteQueryCtx.getParseContext()
+        .getTopOps();
+    Map<Operator<? extends OperatorDesc>, OpParseContext> opParseContext = rewriteQueryCtx
+        .getParseContext().getOpParseCtx();
+
+    // need this to set rowResolver for new scanOperator
+    OpParseContext operatorContext = opParseContext.get(scanOperator);
+
+    // remove original TableScanOperator
+    topOps.remove(alias);
+    topToTable.remove(scanOperator);
+    opParseContext.remove(scanOperator);
+
+    // construct a new descriptor for the index table scan
+    TableScanDesc indexTableScanDesc = new TableScanDesc();
+    indexTableScanDesc.setGatherStats(false);
+
+    String indexTableName = rewriteQueryCtx.getIndexName();
+    Table indexTableHandle = null;
+    try {
+      indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
+    } catch (HiveException e) {
+      LOG.error("Error while getting the table handle for index table.");
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      throw new SemanticException(e.getMessage(), e);
+    }
+
+    String k = indexTableName + Path.SEPARATOR;
+    indexTableScanDesc.setStatsAggPrefix(k);
+    scanOperator.setConf(indexTableScanDesc);
+
+    // Construct the new RowResolver for the new TableScanOperator
+    RowResolver rr = new RowResolver();
+    try {
+      StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle
+          .getDeserializer().getObjectInspector();
+      StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
+      rr.put(indexTableName, field.getFieldName(), new ColumnInfo(field.getFieldName(),
+          TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()),
+          indexTableName, false));
+    } catch (SerDeException e) {
+      LOG.error("Error while creating the RowResolver for new TableScanOperator.");
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      throw new SemanticException(e.getMessage(), e);
+    }
+
+    // Set row resolver for new table
+    operatorContext.setRowResolver(rr);
+
+    String newAlias = indexTableName;
+    int index = alias.lastIndexOf(":");
+    if (index >= 0) {
+      newAlias = alias.substring(0, index) + ":" + indexTableName;
+    }
+
+    // Scan operator now points to other table
+    topToTable.put(scanOperator, indexTableHandle);
+    scanOperator.getConf().setAlias(newAlias);
+    scanOperator.setAlias(indexTableName);
+    topOps.put(newAlias, scanOperator);
+    opParseContext.put(scanOperator, operatorContext);
+    rewriteQueryCtx.getParseContext().setTopToTable((HashMap<TableScanOperator, Table>) topToTable);
+    rewriteQueryCtx.getParseContext().setTopOps(
+        (HashMap<String, Operator<? extends OperatorDesc>>) topOps);
+    rewriteQueryCtx.getParseContext().setOpParseCtx(
+        (LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>) opParseContext);
+
+    ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rr,
+        Arrays.asList(rewriteQueryCtx.getIndexKey()));
+  }
+
+  /**
+   * This method replaces the original SelectOperator with the new
+   * SelectOperator with a new column indexed_key_column.
+   */
+  private void replaceSelectOperatorProcess(SelectOperator operator) throws SemanticException {
+    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
+    // we need to set the colList, outputColumnNames, colExprMap,
+    // rowSchema for only that SelectOperator which precedes the GroupByOperator
+    // count(indexed_key_column) needs to be replaced by
+    // sum(`_count_of_indexed_key_column`)
+    List<ExprNodeDesc> selColList = operator.getConf().getColList();
+    selColList.add(rewriteQueryCtx.getAggrExprNode());
+
+    List<String> selOutputColNames = operator.getConf().getOutputColumnNames();
+    selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn());
+
+    operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(),
+        rewriteQueryCtx.getAggrExprNode());
+
+    RowSchema selRS = operator.getSchema();
+    List<ColumnInfo> selRSSignature = selRS.getSignature();
+    // Need to create a new type for Column[_count_of_indexed_key_column] node
+    PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
+    pti.setTypeName("bigint");
+    ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "", false);
+    selRSSignature.add(newCI);
+    selRS.setSignature((ArrayList<ColumnInfo>) selRSSignature);
+    operator.setSchema(selRS);
+  }
+
+  /**
+   * We need to replace the count(indexed_column_key) GenericUDAF aggregation
+   * function for group-by construct to "sum" GenericUDAF. This method creates a
+   * new operator tree for a sample query that creates a GroupByOperator with
+   * sum aggregation function and uses that GroupByOperator information to
+   * replace the original GroupByOperator aggregation information. It replaces
+   * the AggregationDesc (aggregation descriptor) of the old GroupByOperator
+   * with the new Aggregation Desc of the new GroupByOperator.
+   * @return
+   */
+  private void replaceGroupByOperatorProcess(GroupByOperator operator, int index)
+      throws SemanticException {
+    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
+
+    // We need to replace the GroupByOperator which is before RS
+    if (index == 0) {
+      // the query contains the sum aggregation GenericUDAF
+      String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)"
+          + " from " + rewriteQueryCtx.getIndexName() + " group by "
+          + rewriteQueryCtx.getIndexKey() + " ";
+      // create a new ParseContext for the query to retrieve its operator tree,
+      // and the required GroupByOperator from it
+      ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree(
+          rewriteQueryCtx.getParseContext().getConf(), selReplacementCommand);
+
+      // we get our new GroupByOperator here
+      Map<GroupByOperator, Set<String>> newGbyOpMap = newDAGContext.getGroupOpToInputTables();
+      GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next();
+      GroupByDesc oldConf = operator.getConf();
+
+      // we need this information to set the correct colList, outputColumnNames
+      // in SelectOperator
+      ExprNodeColumnDesc aggrExprNode = null;
+
+      // Construct the new AggregationDesc to get rid of the current
+      // internal names and replace them with new internal names
+      // as required by the operator tree
+      GroupByDesc newConf = newGbyOperator.getConf();
+      List<AggregationDesc> newAggrList = newConf.getAggregators();
+      if (newAggrList != null && newAggrList.size() > 0) {
+        for (AggregationDesc aggregationDesc : newAggrList) {
+          rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
+          aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
+          rewriteQueryCtx.setAggrExprNode(aggrExprNode);
+        }
+      }
+
+      // Now the GroupByOperator has the new AggregationList;
+      // sum(`_count_of_indexed_key`)
+      // instead of count(indexed_key)
+      OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator);
+      RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver();
+      gbyOPC.setRowResolver(gbyRR);
+      rewriteQueryCtx.getOpc().put(operator, gbyOPC);
+
+      oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
+      operator.setConf(oldConf);
+
+    } else {
+      // we just need to reset the GenericUDAFEvaluator and its name for this
+      // GroupByOperator whose parent is the ReduceSinkOperator
+      GroupByDesc childConf = (GroupByDesc) operator.getConf();
+      List<AggregationDesc> childAggrList = childConf.getAggregators();
+      if (childAggrList != null && childAggrList.size() > 0) {
+        for (AggregationDesc aggregationDesc : childAggrList) {
+          List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
+          List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
+          for (ExprNodeDesc expr : paraList) {
+            parametersOIList.add(expr.getWritableObjectInspector());
+          }
+          GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum",
+              parametersOIList, false, false);
+          aggregationDesc.setGenericUDAFEvaluator(evaluator);
+          aggregationDesc.setGenericUDAFName("sum");
+        }
+      }
+    }
   }
 }

Added: hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q?rev=1643732&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_1.q Mon Dec  8 04:42:59 2014
@@ -0,0 +1,173 @@
+set hive.stats.dbclass=fs;
+set hive.stats.autogather=true;
+set hive.cbo.enable=true;
+
+DROP TABLE IF EXISTS lineitem_ix;
+CREATE TABLE lineitem_ix (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix;
+
+CREATE INDEX lineitem_ix_lshipdate_idx ON TABLE lineitem_ix(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)");
+ALTER INDEX lineitem_ix_lshipdate_idx ON lineitem_ix REBUILD;
+
+explain select l_shipdate, count(l_shipdate)
+from lineitem_ix
+group by l_shipdate;
+
+select l_shipdate, count(l_shipdate)
+from lineitem_ix
+group by l_shipdate
+order by l_shipdate;
+
+set hive.optimize.index.groupby=true;
+
+explain select l_shipdate, count(l_shipdate)
+from lineitem_ix
+group by l_shipdate;
+
+select l_shipdate, count(l_shipdate)
+from lineitem_ix
+group by l_shipdate
+order by l_shipdate;
+
+set hive.optimize.index.groupby=false;
+
+
+explain select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem_ix
+group by year(l_shipdate), month(l_shipdate)
+order by year, month;
+
+select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem_ix
+group by year(l_shipdate), month(l_shipdate)
+order by year, month;
+
+set hive.optimize.index.groupby=true;
+
+explain select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem_ix
+group by year(l_shipdate), month(l_shipdate)
+order by year, month;
+
+select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem_ix
+group by year(l_shipdate), month(l_shipdate)
+order by year, month;
+
+explain select lastyear.month,
+        thisyear.month,
+        (thisyear.monthly_shipments - lastyear.monthly_shipments) /
+lastyear.monthly_shipments as monthly_shipments_delta
+   from (select year(l_shipdate) as year,
+                month(l_shipdate) as month,
+                count(l_shipdate) as monthly_shipments
+           from lineitem_ix
+          where year(l_shipdate) = 1997
+          group by year(l_shipdate), month(l_shipdate)
+        )  lastyear join
+        (select year(l_shipdate) as year,
+                month(l_shipdate) as month,
+                count(l_shipdate) as monthly_shipments
+           from lineitem_ix
+          where year(l_shipdate) = 1998
+          group by year(l_shipdate), month(l_shipdate)
+        )  thisyear
+  on lastyear.month = thisyear.month;
+
+explain  select l_shipdate, cnt
+from (select l_shipdate, count(l_shipdate) as cnt from lineitem_ix group by l_shipdate
+union all
+select l_shipdate, l_orderkey as cnt
+from lineitem_ix) dummy;
+
+CREATE TABLE tbl(key int, value int);
+CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+ALTER INDEX tbl_key_idx ON tbl REBUILD;
+
+EXPLAIN select key, count(key) from tbl where key = 1 group by key;
+EXPLAIN select key, count(key) from tbl group by key;
+
+EXPLAIN select count(1) from tbl;
+EXPLAIN select count(key) from tbl;
+
+EXPLAIN select key FROM tbl GROUP BY key;
+EXPLAIN select key FROM tbl GROUP BY value, key;
+EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key;
+EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key;
+EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3);
+
+EXPLAIN select key, value FROM tbl GROUP BY value, key;
+EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value;
+
+EXPLAIN select DISTINCT key FROM tbl;
+EXPLAIN select DISTINCT key FROM tbl;
+EXPLAIN select DISTINCT key FROM tbl;
+EXPLAIN select DISTINCT key, value FROM tbl;
+EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2;
+EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3;
+EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key;
+EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key;
+EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl;
+
+EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2;
+
+DROP TABLE tbl;
+
+CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int);
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11;
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 12;
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 11;
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM srcpart WHERE ds = '2008-04-09' AND hr = 12;
+
+CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD;
+EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key;
+
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD;
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD;
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD;
+EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 GROUP BY key;
+
+DROP INDEX tbl_part_index on tblpart;
+DROP TABLE tblpart;
+
+CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; 
+LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
+
+CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+ALTER INDEX tbl_key_idx ON tbl REBUILD;
+
+set hive.optimize.index.groupby=false;
+explain select key, count(key) from tbl group by key order by key;
+select key, count(key) from tbl group by key order by key;
+set hive.optimize.index.groupby=true;
+explain select key, count(key) from tbl group by key order by key;
+select key, count(key) from tbl group by key order by key;
+DROP TABLE tbl;
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q?rev=1643732&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx_cbo_2.q Mon Dec  8 04:42:59 2014
@@ -0,0 +1,392 @@
+set hive.stats.dbclass=fs;
+set hive.stats.autogather=true;
+set hive.cbo.enable=true;
+set hive.optimize.index.groupby=true;
+
+DROP TABLE IF EXISTS lineitem_ix;
+DROP INDEX IF EXISTS lineitem_ix_L_ORDERKEY_idx on lineitem_ix;
+DROP INDEX IF EXISTS lineitem_ix_L_PARTKEY_idx on lineitem_ix;
+
+
+CREATE TABLE lineitem_ix (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_ix;
+
+CREATE INDEX lineitem_ix_L_ORDERKEY_idx ON TABLE lineitem_ix(L_ORDERKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_ORDERKEY)");
+ALTER INDEX lineitem_ix_L_ORDERKEY_idx ON lineitem_ix REBUILD;
+
+CREATE INDEX lineitem_ix_L_PARTKEY_idx ON TABLE lineitem_ix(L_PARTKEY) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(L_PARTKEY)");
+ALTER INDEX lineitem_ix_L_PARTKEY_idx ON lineitem_ix REBUILD;
+
+explain
+select count(1)
+from lineitem_ix;
+
+select count(1)
+from lineitem_ix;
+
+explain
+select count(L_ORDERKEY)
+from lineitem_ix;
+
+select count(L_ORDERKEY)
+from lineitem_ix;
+
+explain select L_ORDERKEY+L_PARTKEY as keysum,
+count(L_ORDERKEY), count(L_PARTKEY)
+from lineitem_ix
+group by L_ORDERKEY, L_PARTKEY;
+
+select L_ORDERKEY+L_PARTKEY as keysum,
+count(L_ORDERKEY), count(L_PARTKEY)
+from lineitem_ix
+group by L_ORDERKEY, L_PARTKEY;
+
+explain
+select L_ORDERKEY, count(L_ORDERKEY)
+from  lineitem_ix
+where L_ORDERKEY = 7
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(L_ORDERKEY)
+from  lineitem_ix
+where L_ORDERKEY = 7
+group by L_ORDERKEY;
+
+explain
+select L_ORDERKEY, count(1)
+from lineitem_ix
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(1)
+from lineitem_ix
+group by L_ORDERKEY;
+
+explain
+select count(L_ORDERKEY+1)
+from lineitem_ix;
+
+select count(L_ORDERKEY+1)
+from lineitem_ix;
+
+explain
+select L_ORDERKEY, count(L_ORDERKEY+1)
+from lineitem_ix
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(L_ORDERKEY+1)
+from lineitem_ix
+group by L_ORDERKEY;
+
+explain
+select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2)
+from lineitem_ix
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(L_ORDERKEY+1+L_ORDERKEY+2)
+from lineitem_ix
+group by L_ORDERKEY;
+
+explain
+select L_ORDERKEY, count(1+L_ORDERKEY+2)
+from lineitem_ix
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(1+L_ORDERKEY+2)
+from lineitem_ix
+group by L_ORDERKEY;
+
+
+explain
+select L_ORDERKEY as a, count(1) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY;
+
+select L_ORDERKEY as a, count(1) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY;
+
+explain
+select L_ORDERKEY, count(keysum), sum(keysum)
+from
+(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(keysum), sum(keysum)
+from
+(select L_ORDERKEY, L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA
+group by L_ORDERKEY;
+
+
+explain
+select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY)
+from lineitem_ix
+group by L_ORDERKEY;
+
+select L_ORDERKEY, count(L_ORDERKEY), sum(L_ORDERKEY)
+from lineitem_ix
+group by L_ORDERKEY;
+
+explain
+select colA, count(colA)
+from (select L_ORDERKEY as colA from lineitem_ix) tabA
+group by colA;
+
+select colA, count(colA)
+from (select L_ORDERKEY as colA from lineitem_ix) tabA
+group by colA;
+
+explain
+select keysum, count(keysum)
+from
+(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA
+group by keysum;
+
+select keysum, count(keysum)
+from
+(select L_ORDERKEY+L_PARTKEY as keysum from lineitem_ix) tabA
+group by keysum;
+
+explain
+select keysum, count(keysum)
+from
+(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA
+group by keysum;
+
+select keysum, count(keysum)
+from
+(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA
+group by keysum;
+
+
+explain
+select keysum, count(1)
+from
+(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA
+group by keysum;
+
+select keysum, count(1)
+from
+(select L_ORDERKEY+1 as keysum from lineitem_ix) tabA
+group by keysum;
+
+
+explain
+select keysum, count(keysum)
+from
+(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA
+group by keysum;
+
+select keysum, count(keysum)
+from
+(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA
+group by keysum;
+
+
+explain
+select ckeysum, count(ckeysum)
+from
+(select keysum, count(keysum) as ckeysum
+from 
+	(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA
+group by keysum) tabB
+group by ckeysum;
+
+select ckeysum, count(ckeysum)
+from
+(select keysum, count(keysum) as ckeysum
+from 
+	(select L_ORDERKEY+1 as keysum from lineitem_ix where L_ORDERKEY = 7) tabA
+group by keysum) tabB
+group by ckeysum;
+
+explain
+select keysum, count(keysum) as ckeysum
+from
+(select L_ORDERKEY, count(L_ORDERKEY) as keysum
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY)tabA
+group by keysum;
+
+select keysum, count(keysum) as ckeysum
+from
+(select L_ORDERKEY, count(L_ORDERKEY) as keysum
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY)tabA
+group by keysum;
+
+
+DROP INDEX IF EXISTS src_key_idx on src;
+CREATE INDEX src_key_idx ON TABLE src(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+ALTER INDEX src_key_idx ON src REBUILD;
+
+explain
+select tabA.a, tabA.b, tabB.a, tabB.b
+from
+(select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY) tabA
+join
+(select key as a, count(key) as b
+from src
+group by key
+) tabB
+on (tabA.b=tabB.b);
+
+select tabA.a, tabA.b, tabB.a, tabB.b
+from
+(select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY) tabA
+join
+(select key as a, count(key) as b
+from src
+group by key
+) tabB
+on (tabA.b=tabB.b);
+
+
+explain
+select tabA.a, tabA.b, tabB.a, tabB.b
+from
+(select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY) tabA
+join
+(select key as a, count(key) as b
+from src
+group by key
+) tabB
+on (tabA.b=tabB.b and tabB.a < '2');
+
+select tabA.a, tabA.b, tabB.a, tabB.b
+from 
+(select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY) tabA
+join
+(select key as a, count(key) as b
+from src
+group by key
+) tabB
+on (tabA.b=tabB.b and tabB.a < '2');
+
+EXPLAIN
+select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1;
+
+select L_ORDERKEY FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1;
+
+EXPLAIN
+select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1;
+
+select L_ORDERKEY, L_ORDERKEY+1, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY, L_ORDERKEY+1;
+
+EXPLAIN
+select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2;
+
+select L_ORDERKEY+2, count(L_ORDERKEY) FROM lineitem_ix GROUP BY L_ORDERKEY+2;
+
+--with cbo on, the following query can use idx
+
+explain
+select b, count(b) as ckeysum
+from
+(
+select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY
+union all
+select L_PARTKEY as a, count(L_PARTKEY) as b
+from lineitem_ix
+where L_PARTKEY < 10
+group by L_PARTKEY
+) tabA
+group by b;
+
+select b, count(b) as ckeysum
+from
+(
+select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY
+union all
+select L_PARTKEY as a, count(L_PARTKEY) as b
+from lineitem_ix 
+where L_PARTKEY < 10
+group by L_PARTKEY
+) tabA
+group by b;
+
+--with cbo on, the following query can not use idx because AggFunc is empty here
+
+explain
+select a, count(a) as ckeysum
+from
+(
+select L_ORDERKEY as a, count(L_ORDERKEY) as b 
+from lineitem_ix
+where L_ORDERKEY < 7 
+group by L_ORDERKEY
+union all
+select L_PARTKEY as a, count(L_PARTKEY) as b
+from lineitem_ix
+where L_PARTKEY < 10
+group by L_PARTKEY
+) tabA
+group by a;
+
+select a, count(a) as ckeysum
+from
+(
+select L_ORDERKEY as a, count(L_ORDERKEY) as b
+from lineitem_ix
+where L_ORDERKEY < 7
+group by L_ORDERKEY
+union all
+select L_PARTKEY as a, count(L_PARTKEY) as b
+from lineitem_ix 
+where L_PARTKEY < 10
+group by L_PARTKEY
+) tabA
+group by a;
+
+explain
+select a, count(a)
+from (
+select case L_ORDERKEY when null then 1 else 1 END as a
+from lineitem_ix)tab
+group by a;
+
+select a, count(a)
+from (
+select case L_ORDERKEY when null then 1 else 1 END as a
+from lineitem_ix)tab
+group by a;
+

Modified: hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out?rev=1643732&r1=1643731&r2=1643732&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out Mon Dec  8 04:42:59 2014
@@ -1189,14 +1189,14 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: tbl
+            alias: default.default__tbl_tbl_key_idx__
             Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
             Select Operator
-              expressions: key (type: int)
-              outputColumnNames: key
+              expressions: key (type: int), _count_of_key (type: bigint)
+              outputColumnNames: key, _count_of_key
               Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
               Group By Operator
-                aggregations: count(key)
+                aggregations: sum(_count_of_key)
                 mode: hash
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -1206,7 +1206,7 @@ STAGE PLANS:
                   value expressions: _col0 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0)
+          aggregations: sum(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
           Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE



Mime
View raw message