hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject svn commit: r1170007 [2/3] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ data/files/ ql/src/java/org/apache/hadoop/hive/ql/index/ ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/ ql/src/java/org/apache/hadoop/hive/ql/index/compact/ ...
Date Tue, 13 Sep 2011 02:20:53 GMT
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java?rev=1170007&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
Tue Sep 13 02:20:52 2011
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.index;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.QB;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * RewriteParseContextGenerator is a class that offers methods to generate operator tree
+ * for input queries. It is implemented on lines of the analyzeInternal(..) method
+ * of {@link SemanticAnalyzer} but it creates only the ParseContext for the input query command.
+ * It does not optimize or generate map-reduce tasks for the input query.
+ * This can be used when you need to create operator tree for an internal query.
+ *
+ */
+public final class RewriteParseContextGenerator {
+  private static final Log LOG = LogFactory.getLog(RewriteParseContextGenerator.class.getName());
+
+  private RewriteParseContextGenerator(){
+  }
+
+  /**
+   * Parse the input {@link String} command and generate a ASTNode tree.
+   * @param conf
+   * @param command
+   * @return
+   * @throws SemanticException
+   */
+  public static ParseContext generateOperatorTree(HiveConf conf,
+      String command) throws SemanticException{
+    Context ctx;
+    ParseContext subPCtx = null;
+    try {
+      ctx = new Context(conf);
+      ParseDriver pd = new ParseDriver();
+      ASTNode tree = pd.parse(command, ctx);
+      tree = ParseUtils.findRootNonNullToken(tree);
+
+      BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
+      assert(sem instanceof SemanticAnalyzer);
+      doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
+
+      subPCtx = ((SemanticAnalyzer) sem).getParseContext();
+      LOG.info("Sub-query Semantic Analysis Completed");
+    } catch (IOException e) {
+      LOG.error("IOException in generating the operator " +
+        "tree for input command - " + command + " " , e);
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      throw new SemanticException(e.getMessage(), e);
+    } catch (ParseException e) {
+      LOG.error("ParseException in generating the operator " +
+        "tree for input command - " + command + " " , e);
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      throw new SemanticException(e.getMessage(), e);
+    } catch (SemanticException e) {
+      LOG.error("SemanticException in generating the operator " +
+        "tree for input command - " + command + " " , e);
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      throw new SemanticException(e.getMessage(), e);
+    }
+    return subPCtx;
+
+  }
+
+  /**
+   * For the input ASTNode tree, perform a semantic analysis and check metadata
+   * Generate a operator tree and return the {@link ParseContext} instance for the operator
tree.
+   *
+   * @param ctx
+   * @param sem
+   * @param ast
+   * @return
+   * @throws SemanticException
+   */
+  private static void doSemanticAnalysis(SemanticAnalyzer sem,
+      ASTNode ast, Context ctx) throws SemanticException {
+    QB qb = new QB(null, null, false);
+    ASTNode child = ast;
+    ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext();
+    subPCtx.setContext(ctx);
+    ((SemanticAnalyzer) sem).init(subPCtx);
+
+    LOG.info("Starting Sub-query Semantic Analysis");
+    sem.doPhase1(child, qb, sem.initPhase1Ctx());
+    LOG.info("Completed phase 1 of Sub-query Semantic Analysis");
+
+    sem.getMetaData(qb);
+    LOG.info("Completed getting MetaData in Sub-query Semantic Analysis");
+
+    LOG.info("Sub-query Abstract syntax tree: " + ast.toStringTree());
+    sem.genPlan(qb);
+
+    LOG.info("Sub-query Completed plan generation");
+  }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java?rev=1170007&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndex.java
Tue Sep 13 02:20:52 2011
@@ -0,0 +1,322 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.index;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+/**
+ * This class defines a procedure factory used to rewrite the operator plan
+ * Each method replaces the necessary base table data structures with
+ * the index table data structures for each operator.
+ */
+public final class RewriteQueryUsingAggregateIndex {
+  private static final Log LOG = LogFactory.getLog(RewriteQueryUsingAggregateIndex.class.getName());
+  private static RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = null;
+
+  private RewriteQueryUsingAggregateIndex() {
+    //this prevents the class from getting instantiated
+  }
+
+  private static class NewQuerySelectSchemaProc implements NodeProcessor {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
+        Object... nodeOutputs) throws SemanticException {
+      SelectOperator operator = (SelectOperator)nd;
+      rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx;
+      List<Operator<? extends Serializable>> childOps = operator.getChildOperators();
+      Operator<? extends Serializable> childOp = childOps.iterator().next();
+
+      //we need to set the colList, outputColumnNames, colExprMap,
+      // rowSchema for only that SelectOperator which precedes the GroupByOperator
+      // count(indexed_key_column) needs to be replaced by sum(`_count_Of_indexed_key_column`)
+      if (childOp instanceof GroupByOperator){
+        List<ExprNodeDesc> selColList =
+          operator.getConf().getColList();
+        selColList.add(rewriteQueryCtx.getAggrExprNode());
+
+        List<String> selOutputColNames =
+          operator.getConf().getOutputColumnNames();
+        selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn());
+
+        RowSchema selRS = operator.getSchema();
+        List<ColumnInfo> selRSSignature =
+          selRS.getSignature();
+        //Need to create a new type for Column[_count_Of_indexed_key_column] node
+        PrimitiveTypeInfo pti = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("bigint");
+        pti.setTypeName("bigint");
+        ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "",
false);
+        selRSSignature.add(newCI);
+        selRS.setSignature((ArrayList<ColumnInfo>) selRSSignature);
+        operator.setSchema(selRS);
+      }
+      return null;
+    }
+  }
+
+    public static NewQuerySelectSchemaProc getNewQuerySelectSchemaProc(){
+      return new NewQuerySelectSchemaProc();
+  }
+
+
+  /**
+   * This processor replaces the original TableScanOperator with
+   * the new TableScanOperator and metadata that scans over the
+   * index table rather than scanning over the orginal table.
+   *
+   */
+  private static class ReplaceTableScanOpProc implements NodeProcessor {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
+        Object... nodeOutputs) throws SemanticException {
+      TableScanOperator scanOperator = (TableScanOperator)nd;
+      rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx;
+      String baseTableName = rewriteQueryCtx.getBaseTableName();
+      String alias = null;
+      if(baseTableName.contains(":")){
+        alias = (baseTableName.split(":"))[0];
+      }
+
+      //Need to remove the original TableScanOperators from these data structures
+      // and add new ones
+      Map<TableScanOperator, Table>  topToTable =
+        rewriteQueryCtx.getParseContext().getTopToTable();
+      Map<String, Operator<? extends Serializable>>  topOps =
+        rewriteQueryCtx.getParseContext().getTopOps();
+      Map<Operator<? extends Serializable>, OpParseContext>  opParseContext =
+        rewriteQueryCtx.getParseContext().getOpParseCtx();
+
+      //need this to set rowResolver for new scanOperator
+      OpParseContext operatorContext = opParseContext.get(scanOperator);
+
+      //remove original TableScanOperator
+      topToTable.remove(scanOperator);
+      topOps.remove(baseTableName);
+      opParseContext.remove(scanOperator);
+
+      //construct a new descriptor for the index table scan
+      TableScanDesc indexTableScanDesc = new TableScanDesc();
+      indexTableScanDesc.setGatherStats(false);
+
+      String indexTableName = rewriteQueryCtx.getIndexName();
+      Table indexTableHandle = null;
+      try {
+        indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
+      } catch (HiveException e) {
+        LOG.error("Error while getting the table handle for index table.");
+        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+        throw new SemanticException(e.getMessage(), e);
+      }
+
+      String k = indexTableName + Path.SEPARATOR;
+      indexTableScanDesc.setStatsAggPrefix(k);
+      scanOperator.setConf(indexTableScanDesc);
+
+      //Construct the new RowResolver for the new TableScanOperator
+      RowResolver rr = new RowResolver();
+      try {
+        StructObjectInspector rowObjectInspector =
+          (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
+        List<? extends StructField> fields = rowObjectInspector
+        .getAllStructFieldRefs();
+        for (int i = 0; i < fields.size(); i++) {
+          rr.put(indexTableName, fields.get(i).getFieldName(), new ColumnInfo(fields
+              .get(i).getFieldName(), TypeInfoUtils
+              .getTypeInfoFromObjectInspector(fields.get(i)
+                  .getFieldObjectInspector()), indexTableName, false));
+        }
+      } catch (SerDeException e) {
+        LOG.error("Error while creating the RowResolver for new TableScanOperator.");
+        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+        throw new SemanticException(e.getMessage(), e);
+      }
+
+      //Set row resolver for new table
+      operatorContext.setRowResolver(rr);
+      String tabNameWithAlias = null;
+      if(alias != null){
+        tabNameWithAlias = alias + ":" + indexTableName;
+       }else{
+         tabNameWithAlias = indexTableName;
+       }
+
+      //Scan operator now points to other table
+      topToTable.put(scanOperator, indexTableHandle);
+      scanOperator.getConf().setAlias(tabNameWithAlias);
+      scanOperator.setAlias(indexTableName);
+      topOps.put(tabNameWithAlias, scanOperator);
+      opParseContext.put(scanOperator, operatorContext);
+      rewriteQueryCtx.getParseContext().setTopToTable(
+          (HashMap<TableScanOperator, Table>) topToTable);
+      rewriteQueryCtx.getParseContext().setTopOps(
+          (HashMap<String, Operator<? extends Serializable>>) topOps);
+      rewriteQueryCtx.getParseContext().setOpParseCtx(
+          (LinkedHashMap<Operator<? extends Serializable>, OpParseContext>) opParseContext);
+
+      return null;
+    }
+  }
+
+  public static ReplaceTableScanOpProc getReplaceTableScanProc(){
+    return new ReplaceTableScanOpProc();
+  }
+
+  /**
+   * We need to replace the count(indexed_column_key) GenericUDAF aggregation function for
+   * group-by construct to "sum" GenericUDAF.
+   * This processor creates a new operator tree for a sample query that creates a GroupByOperator
+   * with sum aggregation function and uses that GroupByOperator information to replace
+   * the original GroupByOperator aggregation information.
+   * It replaces the AggregationDesc (aggregation descriptor) of the old GroupByOperator
with the
+   * new Aggregation Desc of the new GroupByOperator.
+   */
+  private static class NewQueryGroupbySchemaProc implements NodeProcessor {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
+        Object... nodeOutputs) throws SemanticException {
+      GroupByOperator operator = (GroupByOperator)nd;
+      rewriteQueryCtx = (RewriteQueryUsingAggregateIndexCtx)ctx;
+
+      //We need to replace the GroupByOperator which is in
+      //groupOpToInputTables map with the new GroupByOperator
+      if(rewriteQueryCtx.getParseContext().getGroupOpToInputTables().containsKey(operator)){
+        List<ExprNodeDesc> gbyKeyList = operator.getConf().getKeys();
+        String gbyKeys = null;
+        Iterator<ExprNodeDesc> gbyKeyListItr = gbyKeyList.iterator();
+        while(gbyKeyListItr.hasNext()){
+          ExprNodeDesc expr = gbyKeyListItr.next().clone();
+          if(expr instanceof ExprNodeColumnDesc){
+            ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr;
+            gbyKeys = colExpr.getColumn();
+            if(gbyKeyListItr.hasNext()){
+              gbyKeys = gbyKeys + ",";
+            }
+          }
+        }
+
+
+          //the query contains the sum aggregation GenericUDAF
+        String selReplacementCommand = "select sum(`"
+          + rewriteQueryCtx.getAggregateFunction() + "`)"
+          + " from " + rewriteQueryCtx.getIndexName()
+          + " group by " + gbyKeys + " ";
+        //create a new ParseContext for the query to retrieve its operator tree,
+        //and the required GroupByOperator from it
+        ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree(
+            rewriteQueryCtx.getParseContext().getConf(),
+            selReplacementCommand);
+
+        //we get our new GroupByOperator here
+        Map<GroupByOperator, Set<String>> newGbyOpMap = newDAGContext.getGroupOpToInputTables();
+        GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next();
+        GroupByDesc oldConf = operator.getConf();
+
+        //we need this information to set the correct colList, outputColumnNames in SelectOperator
+        ExprNodeColumnDesc aggrExprNode = null;
+
+        //Construct the new AggregationDesc to get rid of the current
+        //internal names and replace them with new internal names
+        //as required by the operator tree
+        GroupByDesc newConf = newGbyOperator.getConf();
+        List<AggregationDesc> newAggrList = newConf.getAggregators();
+        if(newAggrList != null && newAggrList.size() > 0){
+          for (AggregationDesc aggregationDesc : newAggrList) {
+            rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
+            aggrExprNode = (ExprNodeColumnDesc)aggregationDesc.getParameters().get(0);
+            rewriteQueryCtx.setAggrExprNode(aggrExprNode);
+          }
+        }
+
+        //Now the GroupByOperator has the new AggregationList; sum(`_count_of_indexed_key`)
+        //instead of count(indexed_key)
+        OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator);
+        RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver();
+        gbyOPC.setRowResolver(gbyRR);
+        rewriteQueryCtx.getOpc().put(operator, gbyOPC);
+
+        oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
+        operator.setConf(oldConf);
+
+
+      }else{
+        //we just need to reset the GenericUDAFEvaluator and its name for this
+        //GroupByOperator whose parent is the ReduceSinkOperator
+        GroupByDesc childConf = (GroupByDesc) operator.getConf();
+        List<AggregationDesc> childAggrList = childConf.getAggregators();
+        if(childAggrList != null && childAggrList.size() > 0){
+          for (AggregationDesc aggregationDesc : childAggrList) {
+            List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
+            List<TypeInfo> parametersTypeInfoList = new ArrayList<TypeInfo>();
+            for (ExprNodeDesc expr : paraList) {
+              parametersTypeInfoList.add(expr.getTypeInfo());
+            }
+            GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator(
+                "sum", parametersTypeInfoList, false, false);
+            aggregationDesc.setGenericUDAFEvaluator(evaluator);
+            aggregationDesc.setGenericUDAFName("sum");
+          }
+        }
+
+      }
+
+      return null;
+    }
+  }
+
+  public static NewQueryGroupbySchemaProc getNewQueryGroupbySchemaProc(){
+    return new NewQueryGroupbySchemaProc();
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java?rev=1170007&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
Tue Sep 13 02:20:52 2011
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.index;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+
+/**
+ * RewriteQueryUsingAggregateIndexCtx class stores the
+ * context for the {@link RewriteQueryUsingAggregateIndex}
+ * used to rewrite operator plan with index table instead of base table.
+ */
+
+public final class RewriteQueryUsingAggregateIndexCtx  implements NodeProcessorCtx {
+
+  private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb,
+      String indexTableName, String baseTableName, String aggregateFunction){
+    this.parseContext = parseContext;
+    this.hiveDb = hiveDb;
+    this.indexTableName = indexTableName;
+    this.baseTableName = baseTableName;
+    this.aggregateFunction = aggregateFunction;
+    this.opc = parseContext.getOpParseCtx();
+  }
+
+  public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseContext,
+      Hive hiveDb, String indexTableName, String baseTableName, String aggregateFunction){
+    return new RewriteQueryUsingAggregateIndexCtx(
+        parseContext, hiveDb, indexTableName, baseTableName, aggregateFunction);
+  }
+
+
+  private Map<Operator<? extends Serializable>, OpParseContext> opc =
+    new LinkedHashMap<Operator<? extends Serializable>, OpParseContext>();
+  private final Hive hiveDb;
+  private final ParseContext parseContext;
+  //We need the GenericUDAFEvaluator for GenericUDAF function "sum"
+  private GenericUDAFEvaluator eval = null;
+  private final String indexTableName;
+  private final String baseTableName;
+  private final String aggregateFunction;
+  private ExprNodeColumnDesc aggrExprNode = null;
+
+  public Map<Operator<? extends Serializable>, OpParseContext> getOpc() {
+    return opc;
+  }
+
+  public  ParseContext getParseContext() {
+    return parseContext;
+  }
+
+  public Hive getHiveDb() {
+    return hiveDb;
+  }
+
+  public String getIndexName() {
+     return indexTableName;
+  }
+
+  public GenericUDAFEvaluator getEval() {
+    return eval;
+  }
+
+  public void setEval(GenericUDAFEvaluator eval) {
+    this.eval = eval;
+  }
+
+  public void setAggrExprNode(ExprNodeColumnDesc aggrExprNode) {
+    this.aggrExprNode = aggrExprNode;
+  }
+
+  public ExprNodeColumnDesc getAggrExprNode() {
+    return aggrExprNode;
+  }
+
+ /**
+  * Walk the original operator tree using the {@link DefaultGraphWalker} using the rules.
+  * Each of the rules invoke respective methods from the {@link RewriteQueryUsingAggregateIndex}
+  * to rewrite the original query using aggregate index.
+  *
+  * @param topOp
+  * @throws SemanticException
+  */
+  public void invokeRewriteQueryProc(
+      Operator<? extends Serializable> topOp) throws SemanticException{
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+
+    // replace scan operator containing original table with index table
+    opRules.put(new RuleRegExp("R1", "TS%"),
+        RewriteQueryUsingAggregateIndex.getReplaceTableScanProc());
+    //rule that replaces index key selection with
+    //sum(`_count_of_indexed_column`) function in original query
+    opRules.put(new RuleRegExp("R2", "SEL%"),
+        RewriteQueryUsingAggregateIndex.getNewQuerySelectSchemaProc());
+    //Manipulates the ExprNodeDesc from GroupByOperator aggregation list
+    opRules.put(new RuleRegExp("R3", "GBY%"),
+        RewriteQueryUsingAggregateIndex.getNewQueryGroupbySchemaProc());
+
+    // The dispatcher fires the processor corresponding to the closest matching
+    // rule and passes the context along
+    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, this);
+    GraphWalker ogw = new DefaultGraphWalker(disp);
+
+    // Create a list of topop nodes
+    List<Node> topNodes = new ArrayList<Node>();
+    topNodes.add(topOp);
+    ogw.startWalking(topNodes, null);
+  }
+
+ /**
+  * Default procedure for {@link DefaultRuleDispatcher}.
+  * @return
+  */
+  private NodeProcessor getDefaultProc() {
+    return new NodeProcessor() {
+      @Override
+      public Object process(Node nd, Stack<Node> stack,
+          NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
+        return null;
+      }
+    };
+  }
+
+  public String getBaseTableName() {
+    return baseTableName;
+  }
+
+  public String getAggregateFunction() {
+    return aggregateFunction;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java?rev=1170007&r1=1170006&r2=1170007&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
Tue Sep 13 02:20:52 2011
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.optimi
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -31,11 +30,7 @@ import java.util.Stack;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.ContentSummary;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.hive.metastore.api.Index;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
@@ -46,19 +41,17 @@ import org.apache.hadoop.hive.ql.index.H
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.IndexUtils;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
-import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
 
 /**
 *
@@ -71,12 +64,10 @@ public class IndexWhereProcessor impleme
 
   private static final Log LOG = LogFactory.getLog(IndexWhereProcessor.class.getName());
   private final Map<Table, List<Index>> indexes;
-  private Map<Index, Table> indexToIndexTable;
 
   public IndexWhereProcessor(Map<Table, List<Index>> indexes) {
     super();
     this.indexes = indexes;
-    this.indexToIndexTable = new HashMap<Index, Table>();
   }
 
   @Override
@@ -107,7 +98,7 @@ public class IndexWhereProcessor impleme
     // check if we have indexes on all partitions in this table scan
     Set<Partition> queryPartitions;
     try {
-      queryPartitions = checkPartitionsCoveredByIndex(operator, pctx);
+      queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
       if (queryPartitions == null) { // partitions not covered
         return null;
       }
@@ -230,157 +221,6 @@ public class IndexWhereProcessor impleme
     return;
   }
 
-  /**
-   * Check the partitions used by the table scan to make sure they also exist in the
-   * index table
-   * @param pctx
-   * @param operator
-   * @return partitions used by query.  null if they do not exist in index table
-   */
-  private Set<Partition> checkPartitionsCoveredByIndex(TableScanOperator tableScan,
ParseContext pctx)
-    throws HiveException {
-    Hive hive = Hive.get(pctx.getConf());
-
-
-    // make sure each partition exists on the index table
-    PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
-    Set<Partition> queryPartitions = queryPartitionList.getConfirmedPartns();
-
-    for (Partition part : queryPartitions) {
-      List<Table> sourceIndexTables = getIndexTables(hive, part);
-      if (!containsPartition(hive, part)) {
-        return null; // problem if it doesn't contain the partition
-      }
-    }
-
-    return queryPartitions;
-  }
-
-  /**
-   * return index tables associated with a given base table
-   */
-  private List<Table> getIndexTables(Hive hive, Table table) throws
-    HiveException {
-    List<Table> indexTables = new ArrayList<Table>();
-    if (indexes == null || indexes.get(table) == null) {
-      return indexTables;
-    }
-    for (Index index : indexes.get(table)) {
-      Table indexTable = hive.getTable(index.getIndexTableName());
-      indexToIndexTable.put(index, indexTable);
-      indexTables.add(indexTable);
-    }
-    return indexTables;
-  }
-
-  /**
-   * return index tables associated with the base table of the partition
-   */
-  private List<Table> getIndexTables(Hive hive, Partition part) throws HiveException
{
-    List<Table> indexTables = new ArrayList<Table>();
-    Table partitionedTable = part.getTable();
-    if (indexes == null || indexes.get(partitionedTable) == null) {
-      return indexTables;
-    }
-    for (Index index : indexes.get(partitionedTable)) {
-      Table indexTable = hive.getTable(index.getIndexTableName());
-      indexToIndexTable.put(index, indexTable);
-      indexTables.add(indexTable);
-    }
-    return indexTables;
-  }
-
-  /**
-   * check that every index table contains the given partition and is fresh
-   */
-  private boolean containsPartition(Hive hive, Partition part)
-    throws HiveException {
-    HashMap<String, String> partSpec = part.getSpec();
-
-    if (indexes == null || indexes.get(part.getTable()) == null) {
-      return false;
-    }
-
-    if (partSpec.isEmpty()) {
-      // empty specs come from non-partitioned tables
-      return isIndexTableFresh(hive, indexes.get(part.getTable()), part.getTable());
-    }
-
-    for (Index index : indexes.get(part.getTable())) {
-      Table indexTable = indexToIndexTable.get(index);
-      // get partitions that match the spec
-      List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);
-      if (matchingPartitions == null || matchingPartitions.size() == 0) {
-        LOG.info("Index table " + indexTable + "did not contain built partition that matched
" + partSpec);
-        return false;
-      } else if (!isIndexPartitionFresh(hive, index, part)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Check the index partitions on a parttioned table exist and are fresh
-   */
-  private boolean isIndexPartitionFresh(Hive hive, Index index,
-      Partition part) throws HiveException {
-    LOG.info("checking index staleness...");
-    try {
-      FileSystem partFs = part.getPartitionPath().getFileSystem(hive.getConf());
-      FileStatus partFss = partFs.getFileStatus(part.getPartitionPath());
-      String ts = index.getParameters().get(part.getSpec().toString());
-      if (ts == null) {
-        return false;
-      }
-      long indexTs = Long.parseLong(ts);
-      LOG.info(partFss.getModificationTime());
-      LOG.info(ts);
-      if (partFss.getModificationTime() > indexTs) {
-        LOG.info("index is stale on the partitions that matched " + part.getSpec());
-        return false;
-      }
-    } catch (IOException e) {
-      LOG.info("failed to grab timestamp info");
-      throw new HiveException(e);
-    }
-    return true;
-  }
-
-  /**
-   * Check that the indexes on the unpartioned table exist and are fresh
-   */
-  private boolean isIndexTableFresh(Hive hive, List<Index> indexes, Table src)
-    throws HiveException {
-    //check that they exist
-    if (indexes == null || indexes.size() == 0) {
-      return false;
-    }
-    //check that they are not stale
-    for (Index index : indexes) {
-      LOG.info("checking index staleness...");
-      try {
-        FileSystem srcFs = src.getPath().getFileSystem(hive.getConf());
-        FileStatus srcFss= srcFs.getFileStatus(src.getPath());
-        String ts = index.getParameters().get("base_timestamp");
-        if (ts == null) {
-          return false;
-        }
-        long indexTs = Long.parseLong(ts);
-        LOG.info(srcFss.getModificationTime());
-        LOG.info(ts);
-        if (srcFss.getModificationTime() > indexTs) {
-          LOG.info("index is stale ");
-          return false;
-        }
-      } catch (IOException e) {
-        LOG.info("failed to grab timestamp info");
-        throw new HiveException(e);
-      }
-    }
-    return true;
-  }
-
 
   /**
    * Insert the rewrite tasks at the head of the pctx task tree

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java?rev=1170007&r1=1170006&r2=1170007&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
Tue Sep 13 02:20:52 2011
@@ -29,8 +29,8 @@ import java.util.Stack;
 
 import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler;
 import org.apache.hadoop.hive.ql.index.bitmap.BitmapIndexHandler;
+import org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -40,8 +40,8 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.IndexUtils;
 import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -118,7 +118,7 @@ public class IndexWhereTaskDispatcher im
     Map<Table, List<Index>> indexes = new HashMap<Table, List<Index>>();
     for (Table tbl : topTables)
     {
-      List<Index> tblIndexes = getIndexes(tbl, supportedIndexes);
+      List<Index> tblIndexes = IndexUtils.getIndexes(tbl, supportedIndexes);
       if (tblIndexes.size() > 0) {
         indexes.put(tbl, tblIndexes);
       }
@@ -136,29 +136,6 @@ public class IndexWhereTaskDispatcher im
     return operatorRules;
   }
 
-  /**
-   * Get a list of indexes on a table that match given types.
-   * Copied from HIVE-1694 patch
-   */
-  private List<Index> getIndexes(Table baseTableMetaData, List<String> matchIndexTypes)
-    throws SemanticException {
-    List<Index> matchingIndexes = new ArrayList<Index>();
-    List<Index> indexesOnTable = null;
-
-    try {
-      indexesOnTable = baseTableMetaData.getAllIndexes((short) -1); // get all indexes
-    } catch (HiveException e) {
-      throw new SemanticException("Error accessing metastore", e);
-    }
-
-    for (Index index : indexesOnTable) {
-      String indexType = index.getIndexHandlerClass();
-      if (matchIndexTypes.contains(indexType)) {
-        matchingIndexes.add(index);
-      }
-    }
-    return matchingIndexes;
-  }
 
   private NodeProcessor getDefaultProcessor() {
     return new NodeProcessor() {

Added: hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q?rev=1170007&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q Tue Sep 13 02:20:52
2011
@@ -0,0 +1,170 @@
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+CREATE INDEX lineitem_lshipdate_idx ON TABLE lineitem(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler'
WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)");
+ALTER INDEX lineitem_lshipdate_idx ON lineitem REBUILD;
+
+explain select l_shipdate, count(l_shipdate)
+from lineitem
+group by l_shipdate;
+
+select l_shipdate, count(l_shipdate)
+from lineitem
+group by l_shipdate
+order by l_shipdate;
+
+set hive.optimize.index.groupby=true;
+
+explain select l_shipdate, count(l_shipdate)
+from lineitem
+group by l_shipdate;
+
+select l_shipdate, count(l_shipdate)
+from lineitem
+group by l_shipdate
+order by l_shipdate;
+
+set hive.optimize.index.groupby=false;
+
+
+explain select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem
+group by year(l_shipdate), month(l_shipdate) 
+order by year, month;
+
+select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem
+group by year(l_shipdate), month(l_shipdate) 
+order by year, month;
+
+set hive.optimize.index.groupby=true;
+
+explain select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem
+group by year(l_shipdate), month(l_shipdate) 
+order by year, month;
+
+select year(l_shipdate) as year,
+        month(l_shipdate) as month,
+        count(l_shipdate) as monthly_shipments
+from lineitem
+group by year(l_shipdate), month(l_shipdate) 
+order by year, month;
+
+explain select lastyear.month,
+        thisyear.month,
+        (thisyear.monthly_shipments - lastyear.monthly_shipments) /
+lastyear.monthly_shipments as monthly_shipments_delta
+   from (select year(l_shipdate) as year,
+                month(l_shipdate) as month,
+                count(l_shipdate) as monthly_shipments
+           from lineitem
+          where year(l_shipdate) = 1997
+          group by year(l_shipdate), month(l_shipdate)
+        )  lastyear join
+        (select year(l_shipdate) as year,
+                month(l_shipdate) as month,
+                count(l_shipdate) as monthly_shipments
+           from lineitem
+          where year(l_shipdate) = 1998
+          group by year(l_shipdate), month(l_shipdate)
+        )  thisyear
+  on lastyear.month = thisyear.month;
+
+explain  select l_shipdate, cnt
+from (select l_shipdate, count(l_shipdate) as cnt from lineitem group by l_shipdate
+union all
+select l_shipdate, l_orderkey as cnt
+from lineitem) dummy;
+
+CREATE TABLE tbl(key int, value int);
+CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler'
WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+ALTER INDEX tbl_key_idx ON tbl REBUILD;
+
+EXPLAIN select key, count(key) from tbl where key = 1 group by key;
+EXPLAIN select key, count(key) from tbl group by key;
+
+EXPLAIN select count(1) from tbl;
+EXPLAIN select count(key) from tbl;
+
+EXPLAIN select key FROM tbl GROUP BY key;
+EXPLAIN select key FROM tbl GROUP BY value, key;
+EXPLAIN select key FROM tbl WHERE key = 3 GROUP BY key;
+EXPLAIN select key FROM tbl WHERE value = 2 GROUP BY key;
+EXPLAIN select key FROM tbl GROUP BY key, substr(key,2,3);
+
+EXPLAIN select key, value FROM tbl GROUP BY value, key;
+EXPLAIN select key, value FROM tbl WHERE value = 1 GROUP BY key, value;
+
+EXPLAIN select DISTINCT key FROM tbl;
+EXPLAIN select DISTINCT key FROM tbl;
+EXPLAIN select DISTINCT key FROM tbl;
+EXPLAIN select DISTINCT key, value FROM tbl;
+EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2;
+EXPLAIN select DISTINCT key, value FROM tbl WHERE value = 2 AND key = 3;
+EXPLAIN select DISTINCT key, value FROM tbl WHERE value = key;
+EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl WHERE value = key;
+EXPLAIN select DISTINCT key, substr(value,2,3) FROM tbl;
+
+EXPLAIN select * FROM (select DISTINCT key, value FROM tbl) v1 WHERE v1.value = 2;
+
+DROP TABLE tbl;
+
+CREATE TABLE tblpart (key int, value string) PARTITIONED BY (ds string, hr int);
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM
srcpart WHERE ds = '2008-04-08' AND hr = 11;
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-08', hr=12) SELECT key, value FROM
srcpart WHERE ds = '2008-04-08' AND hr = 12;
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=11) SELECT key, value FROM
srcpart WHERE ds = '2008-04-09' AND hr = 11;
+INSERT OVERWRITE TABLE tblpart PARTITION (ds='2008-04-09', hr=12) SELECT key, value FROM
srcpart WHERE ds = '2008-04-09' AND hr = 12;
+
+CREATE INDEX tbl_part_index ON TABLE tblpart(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler'
WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=11) REBUILD;
+EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key <
10 GROUP BY key;
+
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-08', hr=12) REBUILD;
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=11) REBUILD;
+ALTER INDEX tbl_part_index ON tblpart PARTITION (ds='2008-04-09', hr=12) REBUILD;
+EXPLAIN SELECT key, count(key) FROM tblpart WHERE ds='2008-04-09' AND hr=12 AND key <
10 GROUP BY key;
+
+DROP INDEX tbl_part_index on tblpart;
+DROP TABLE tblpart;
+
+CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; 
+LOAD DATA LOCAL INPATH '../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
+
+CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler'
WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
+ALTER INDEX tbl_key_idx ON tbl REBUILD;
+
+set hive.optimize.index.groupby=false;
+explain select key, count(key) from tbl group by key order by key;
+select key, count(key) from tbl group by key order by key;
+set hive.optimize.index.groupby=true;
+explain select key, count(key) from tbl group by key order by key;
+select key, count(key) from tbl group by key order by key;
+DROP TABLE tbl;
\ No newline at end of file



Mime
View raw message