hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject svn commit: r1098742 [1/2] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ eclipse-templates/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/index/ ql/src/ja...
Date Mon, 02 May 2011 19:10:44 GMT
Author: jvs
Date: Mon May  2 19:10:42 2011
New Revision: 1098742

URL: http://svn.apache.org/viewvc?rev=1098742&view=rev
Log:
HIVE-1644. Use filter pushdown for automatically accessing indexes.
(Russell Melick and Jeffrey Lym via jvs)


Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
    hive/trunk/ql/src/test/queries/clientpositive/index_auto.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_file_format.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_multiple.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_unused.q
    hive/trunk/ql/src/test/results/clientpositive/index_auto.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_file_format.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_multiple.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_partitioned.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_unused.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml
    hive/trunk/eclipse-templates/.classpath
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Mon May  2 19:10:42 2011
@@ -358,6 +358,7 @@ public class HiveConf extends Configurat
 
     // Optimizer
     HIVEOPTCP("hive.optimize.cp", true), // column pruner
+    HIVEOPTINDEXFILTER("hive.optimize.index.filter", false), // automatically use indexes
     HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown
     // push predicates down to storage handlers
     HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true),
@@ -366,6 +367,10 @@ public class HiveConf extends Configurat
     HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join
     HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
 
+    // Indexes
+    HIVEOPTINDEXFILTER_COMPACT_MINSIZE("hive.optimize.index.filter.compact.minsize", (long) 5 * 1024 * 1024 * 1024), // 5G
+    HIVEOPTINDEXFILTER_COMPACT_MAXSIZE("hive.optimize.index.filter.compact.maxsize", (long) -1), // infinity
+
     // Statistics
     HIVESTATSAUTOGATHER("hive.stats.autogather", true),
     HIVESTATSDBCLASS("hive.stats.dbclass",

Modified: hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml (original)
+++ hive/trunk/conf/hive-default.xml Mon May  2 19:10:42 2011
@@ -325,6 +325,12 @@
 </property>
 
 <property>
+  <name>hive.optimize.index.filter</name>
+  <value>false</value>
+  <description>Whether to enable automatic use of indexes</description>
+</property>
+
+<property>
   <name>hive.optimize.ppd</name>
   <value>true</value>
   <description>Whether to enable predicate pushdown</description>
@@ -1011,6 +1017,19 @@
 </property>
 
 <property>
+  <name>hive.optimize.index.filter.compact.minsize</name>
+  <value>5368709120</value>
+  <description>Minimum size (in bytes) of the inputs on which a compact index is automatically used.</description>
+</property>
+
+<property>
+  <name>hive.optimize.index.filter.compact.maxsize</name>
+  <value>-1</value>
+  <description>Maximum size (in bytes) of the inputs on which a compact index is automatically used.
+  A negative number is equivalent to infinity.</description>
+</property>
+
+<property>
   <name>hive.exim.uri.scheme.whitelist</name>
   <value>hdfs,pfile</value>
   <description>A comma separated list of acceptable URI schemes for import and export.</description>

Modified: hive/trunk/eclipse-templates/.classpath
URL: http://svn.apache.org/viewvc/hive/trunk/eclipse-templates/.classpath?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/eclipse-templates/.classpath (original)
+++ hive/trunk/eclipse-templates/.classpath Mon May  2 19:10:42 2011
@@ -35,6 +35,7 @@
   <classpathentry kind="lib" path="build/dist/lib/commons-pool-@commons-pool.version@.jar"/>
   <classpathentry kind="lib" path="build/dist/lib/slf4j-api-@slf4j-api.version@.jar"/>
   <classpathentry kind="lib" path="build/dist/lib/slf4j-log4j12-@slf4j-log4j12.version@.jar"/>
+  <classpathentry kind="lib" path="build/dist/lib/javaewah-0.2.jar"/>
   <classpathentry kind="src" path="build/contrib/test/src"/>
   <classpathentry kind="src" path="build/metastore/gen/antlr/gen-java"/>
   <classpathentry kind="src" path="build/ql/test/src"/>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Mon May  2 19:10:42 2011
@@ -317,6 +317,17 @@ public class Driver implements CommandPr
    *          The SQL query to compile.
    */
   public int compile(String command) {
+    return compile(command, true);
+  }
+
+  /**
+   * Compile a new query, but potentially reset taskID counter.  Not resetting task counter
+   * is useful for generating re-entrant QL queries.  
+   * @param command  The HiveQL query to compile
+   * @param resetTaskIds Resets taskID counter if true.  
+   * @return
+   */
+  public int compile(String command, boolean resetTaskIds) {
 
     Utilities.PerfLogBegin(LOG, "compile");
 
@@ -325,7 +336,9 @@ public class Driver implements CommandPr
       plan = null;
     }
 
+    if (resetTaskIds) {
     TaskFactory.resetId();
+    }
 
     try {
       command = new VariableSubstitution().substitute(conf,command);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Mon May  2 19:10:42 2011
@@ -277,9 +277,8 @@ public class ExecDriver extends Task<Map
     job.setNumReduceTasks(work.getNumReduceTasks().intValue());
     job.setReducerClass(ExecReducer.class);
 
-    if (work.getInputformat() != null) {
-      HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
-    }
+    // set input format information if necessary
+    setInputAttributes(job);
 
     // Turn on speculative execution for reducers
     boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job,
@@ -471,6 +470,18 @@ public class ExecDriver extends Task<Map
     return (returnVal);
   }
 
+  /**
+   * Set hive input format, and input format file if necessary.
+   */
+  protected void setInputAttributes(Configuration conf) {
+    if (work.getInputformat() != null) {
+      HiveConf.setVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
+    }
+    if (work.getIndexIntermediateFile() != null) {
+      conf.set("hive.index.compact.file", work.getIndexIntermediateFile());
+    }
+  }
+
   public boolean mapStarted() {
     return this.jobExecHelper.mapStarted();
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java Mon May  2 19:10:42 2011
@@ -126,6 +126,9 @@ public class MapRedTask extends ExecDriv
       // we need to edit the configuration to setup cmdline. clone it first
       cloneConf();
 
+      // propagate input format if necessary
+      super.setInputAttributes(conf);
+
       // enable assertion
       String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
       String hiveJar = conf.getJar();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java Mon May  2 19:10:42 2011
@@ -20,8 +20,12 @@ package org.apache.hadoop.hive.ql.index;
 
 import java.util.List;
 
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 
 /**
  * Abstract base class for index handlers.  This is provided as insulation
@@ -42,4 +46,13 @@ public abstract class AbstractIndexHandl
     return sb.toString();
   }
 
+  public void generateIndexQuery(Index index, ExprNodeDesc predicate,
+    ParseContext pctx, HiveIndexQueryContext queryContext) {
+    queryContext.setQueryTasks(null);
+    return;
+  }
+
+  public boolean checkQuerySize(long inputSize, HiveConf conf) {
+    return false;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java Mon May  2 19:10:42 2011
@@ -22,12 +22,15 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 
 /**
  * HiveIndexHandler defines a pluggable interface for adding new index handlers
@@ -114,4 +117,23 @@ public interface HiveIndexHandler extend
       Set<ReadEntity> inputs, Set<WriteEntity> outputs)
       throws HiveException;
 
+  /**
+   * Generate the list of tasks required to run an index sub-query for the
+   * given predicate, using the given index
+   * @param index
+   * @param predicate
+   * @param parseContext
+   * @param queryContext contains results, such as query tasks and input configuration
+   */
+  void generateIndexQuery(Index index, ExprNodeDesc predicate,
+    ParseContext pctx, HiveIndexQueryContext queryContext);
+
+  /**
+   * Check the size of an input query to make sure it fits within the bounds
+   *
+   * @param inputSize size (in bytes) of the query in question
+   * @param conf
+   * @return true if query is within the bounds
+   */
+  boolean checkQuerySize(long inputSize, HiveConf conf);
 }
\ No newline at end of file

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java Mon May  2 19:10:42 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.index;
+
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+
+/**
+ * Used to pass information between the IndexProcessor and the plugin
+ * IndexHandler during query processing
+ *
+ */
+public class HiveIndexQueryContext {
+
+  private HashSet<ReadEntity> additionalSemanticInputs; // additional inputs to add to the parse context when
+                                                        // merging the index query tasks
+  private String indexInputFormat;        // input format to set on the TableScanOperator to activate indexing
+  private String indexIntermediateFile;   // name of intermediate file written by the index query for the
+                                          // TableScanOperator to use
+  private List<Task<? extends Serializable>> queryTasks;      // list of tasks that will execute the index query and write
+                                                              // results to a temporary file
+  private ExprNodeDesc residualPredicate; // predicate that could not be processed by an index handler
+                                          // and should be used on the base table scan (see HIVE-2115)
+  private Set<Partition> queryPartitions; // partitions accessed by the original query
+
+  public HiveIndexQueryContext() {
+    this.additionalSemanticInputs = null;
+    this.indexInputFormat = null;
+    this.indexIntermediateFile = null;
+    this.queryTasks = null;
+  }
+
+  public HashSet<ReadEntity> getAdditionalSemanticInputs() {
+    return additionalSemanticInputs;
+  }
+  public void addAdditionalSemanticInputs(HashSet<ReadEntity> additionalParseInputs) {
+    if (this.additionalSemanticInputs == null) {
+      this.additionalSemanticInputs = new HashSet<ReadEntity>();
+    }
+    this.additionalSemanticInputs.addAll(additionalParseInputs);
+  }
+
+  public String getIndexInputFormat() {
+    return indexInputFormat;
+  }
+  public void setIndexInputFormat(String indexInputFormat) {
+    this.indexInputFormat = indexInputFormat;
+  }
+
+  public String getIndexIntermediateFile() {
+    return indexIntermediateFile;
+  }
+  public void setIndexIntermediateFile(String indexIntermediateFile) {
+    this.indexIntermediateFile = indexIntermediateFile;
+  }
+
+  public List<Task<? extends Serializable>> getQueryTasks() {
+    return queryTasks;
+  }
+  public void setQueryTasks(List<Task<? extends Serializable>> indexQueryTasks) {
+    this.queryTasks = indexQueryTasks;
+  }
+
+  public void setResidualPredicate(ExprNodeDesc residualPredicate) {
+    this.residualPredicate = residualPredicate;
+  }
+
+  public ExprNodeDesc getResidualPredicate() {
+    return residualPredicate;
+  }
+
+  public Set<Partition> getQueryPartitions() {
+    return queryPartitions;
+  }
+
+  public void setQueryPartitions(Set<Partition> queryPartitions) {
+    this.queryPartitions = queryPartitions;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Mon May  2 19:10:42 2011
@@ -19,12 +19,12 @@
 package org.apache.hadoop.hive.ql.index.compact;
 
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Set;
-import java.util.Map.Entry;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -33,23 +33,33 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.index.TableBasedIndexHandler;
+import org.apache.hadoop.hive.ql.index.HiveIndexQueryContext;
 import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask;
 import org.apache.hadoop.hive.ql.index.IndexMetadataChangeWork;
+import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
+import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
+import org.apache.hadoop.hive.ql.index.TableBasedIndexHandler;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
 
 public class CompactIndexHandler extends TableBasedIndexHandler {
 
   private Configuration configuration;
+  private static final Log LOG = LogFactory.getLog(CompactIndexHandler.class.getName());
+
 
   @Override
   public void analyzeIndexDefinition(Table baseTable, Index index,
@@ -131,4 +141,123 @@ public class CompactIndexHandler extends
 
     return rootTask;
   }
+
+  @Override
+  public void generateIndexQuery(Index index, ExprNodeDesc predicate,
+    ParseContext pctx, HiveIndexQueryContext queryContext) {
+
+    DecomposedPredicate decomposedPredicate = decomposePredicate(predicate, index,
+                                                                  queryContext.getQueryPartitions());
+
+    if (decomposedPredicate == null) {
+      queryContext.setQueryTasks(null);
+      return; // abort if we couldn't pull out anything from the predicate
+    }
+
+    // pass residual predicate back out for further processing
+    queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
+
+    // Build reentrant QL for index query
+    StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
+
+    String tmpFile = pctx.getContext().getMRTmpFileURI();
+    qlCommand.append( "\"" + tmpFile + "\" ");            // QL includes " around file name
+    qlCommand.append("SELECT `_bucketname` ,  `_offsets` FROM ");
+    qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
+    qlCommand.append(" WHERE ");
+
+    String predicateString = decomposedPredicate.pushedPredicate.getExprString();
+    qlCommand.append(predicateString);
+
+    // generate tasks from index query string
+    LOG.info("Generating tasks for re-entrant QL query: " + qlCommand.toString());
+    Driver driver = new Driver(pctx.getConf());
+    driver.compile(qlCommand.toString(), false);
+
+    // setup TableScanOperator to change input format for original query
+    queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
+    queryContext.setIndexIntermediateFile(tmpFile);
+
+    queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
+    queryContext.setQueryTasks(driver.getPlan().getRootTasks());
+    return;
+  }
+
+  /**
+   * Split the predicate into the piece we can deal with (pushed), and the one we can't (residual)
+   * @param predicate
+   * @param index
+   * @return
+   */
+  private DecomposedPredicate decomposePredicate(ExprNodeDesc predicate, Index index,
+      Set<Partition> queryPartitions) {
+    IndexPredicateAnalyzer analyzer = getIndexPredicateAnalyzer(index, queryPartitions);
+    List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
+    // split predicate into pushed (what we can handle), and residual (what we can't handle)
+    ExprNodeDesc residualPredicate = analyzer.analyzePredicate(predicate, searchConditions);
+
+    if (searchConditions.size() == 0) {
+      return null;
+    }
+
+    DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
+    decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(searchConditions);
+    decomposedPredicate.residualPredicate = residualPredicate;
+
+    return decomposedPredicate;
+  }
+
+  /**
+   * Instantiate a new predicate analyzer suitable for determining
+   * whether we can use an index, based on rules for indexes in
+   * WHERE clauses that we support
+   *
+   * @return preconfigured predicate analyzer for WHERE queries
+   */
+  private IndexPredicateAnalyzer getIndexPredicateAnalyzer(Index index, Set<Partition> queryPartitions)  {
+    IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
+
+    analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
+    analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
+    analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
+    analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
+    analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
+
+    // only return results for columns in this index
+    List<FieldSchema> columnSchemas = index.getSd().getCols();
+    for (FieldSchema column : columnSchemas) {
+      analyzer.allowColumnName(column.getName());
+    }
+
+    // partitioned columns are treated as if they have indexes so that the partitions
+    // are used during the index query generation
+    for (Partition part : queryPartitions) {
+      if (part.getSpec().isEmpty()) {
+        continue; // empty partitions are from whole tables, so we don't want to add them in
+      }
+      List<FieldSchema> partitionColumns = part.getCols();
+      for (FieldSchema column : partitionColumns) {
+        analyzer.allowColumnName(column.getName());
+      }
+    }
+
+    return analyzer;
+  }
+
+
+  @Override
+  public boolean checkQuerySize(long querySize, HiveConf hiveConf) {
+    long minSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER_COMPACT_MINSIZE);
+    long maxSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER_COMPACT_MAXSIZE);
+    if (maxSize < 0) {
+      maxSize = Long.MAX_VALUE;
+    }
+    return (querySize > minSize & querySize < maxSize);
+  }
+
+  @Override
+  public boolean usesIndexTable() {
+    return true;
+  }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Mon May  2 19:10:42 2011
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.ProtectMode;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
@@ -813,4 +814,13 @@ public class Table implements Serializab
   public String getCompleteName() {
     return getDbName() + "@" + getTableName();
   }
+
+  /**
+   * @return List containing Indexes names if there are indexes on this table
+   * @throws HiveException
+   **/
+  public List<Index> getAllIndexes(short max) throws HiveException {
+    Hive hive = Hive.get();
+    return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max);
+  }
 };

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java Mon May  2 19:10:42 2011
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereTaskDispatcher;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+public class IndexWhereResolver implements PhysicalPlanResolver {
+
+  @Override
+  public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException {
+    Dispatcher dispatcher = new IndexWhereTaskDispatcher(physicalContext);
+    GraphWalker opGraphWalker = new DefaultGraphWalker(dispatcher);
+    ArrayList<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(physicalContext.rootTasks);
+    opGraphWalker.startWalking(topNodes, null);
+
+    return physicalContext;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Mon May  2 19:10:42 2011
@@ -52,6 +52,9 @@ public class PhysicalOptimizer {
     if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
       resolvers.add(new CommonJoinResolver());
     }
+    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
+      resolvers.add(new IndexWhereResolver());
+    }
     resolvers.add(new MapJoinResolver());
   }
 

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java Mon May  2 19:10:42 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical.index;
+
+import java.io.Serializable;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+
+public class IndexWhereProcCtx implements NodeProcessorCtx {
+
+  private static final Log LOG = LogFactory.getLog(IndexWhereProcCtx.class.getName());
+
+  private final Task<? extends Serializable> currentTask;
+  private final ParseContext parseCtx;
+
+  public IndexWhereProcCtx(Task<? extends Serializable> task, ParseContext parseCtx) {
+    this.currentTask = task;
+    this.parseCtx = parseCtx;
+  }
+
+  public ParseContext getParseContext() {
+    return parseCtx;
+  }
+
+  public Task<? extends Serializable> getCurrentTask() {
+    return currentTask;
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java Mon May  2 19:10:42 2011
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.index.HiveIndexHandler;
+import org.apache.hadoop.hive.ql.index.HiveIndexQueryContext;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
+
+/**
+*
+* IndexWhereProcessor.
+* Processes Operator Nodes to look for WHERE queries with a predicate column
+* on which we have an index.  Creates an index subquery Task for these
+* WHERE queries to use the index automatically.
+*/
+public class IndexWhereProcessor implements NodeProcessor {
+
+  private static final Log LOG = LogFactory.getLog(IndexWhereProcessor.class.getName());
+  private final Map<Table, List<Index>> indexes;
+
+  public IndexWhereProcessor(Map<Table, List<Index>> indexes) {
+    super();
+    this.indexes = indexes;
+  }
+
+  @Override
+  /**
+   * Process a node of the operator tree.  This matches on the rule in IndexWhereTaskDispatcher
+   */
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+                        Object... nodeOutputs) throws SemanticException {
+
+    FilterOperator operator = (FilterOperator) nd;
+    FilterDesc operatorDesc = operator.getConf();
+    ExprNodeDesc predicate = operatorDesc.getPredicate();
+
+    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
+    ParseContext pctx = context.getParseContext();
+
+    // check if we have indexes on all partitions in this table scan
+    Set<Partition> queryPartitions;
+    try {
+      queryPartitions = checkPartitionsCoveredByIndex(operator, pctx);
+      if (queryPartitions == null) { // partitions not covered
+        return null;
+      }
+    } catch (HiveException e) {
+      LOG.error("Fatal Error: problem accessing metastore", e);
+      throw new SemanticException(e);
+    }
+
+    // we can only process MapReduce tasks to check input size
+    if (!context.getCurrentTask().isMapRedTask()) {
+      return null;
+    }
+    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
+
+    // get potential reentrant index queries from each index
+    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
+    Collection<List<Index>> tableIndexes = indexes.values();
+    for (List<Index> indexesOnTable : tableIndexes) {
+      for (Index index : indexesOnTable) {
+        HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
+        queryContext.setQueryPartitions(queryPartitions);
+        rewriteForIndex(predicate, index, pctx, currentTask, queryContext);
+        List<Task<?>> indexTasks = queryContext.getQueryTasks();
+
+        if (indexTasks != null && indexTasks.size() > 0) {
+          queryContexts.put(index, queryContext);
+        }
+      }
+    }
+
+    // choose an index rewrite to use
+    if (queryContexts.size() > 0) {
+      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
+      Index chosenIndex = queryContexts.keySet().iterator().next();
+
+      // modify the parse context to use indexing
+      // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
+      HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
+
+      // prepare the map reduce job to use indexing
+      MapredWork work = currentTask.getWork();
+      work.setInputformat(queryContext.getIndexInputFormat());
+      work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());
+
+      // modify inputs based on index query
+      Set<ReadEntity> inputs = pctx.getSemanticInputs();
+      inputs.addAll(queryContext.getAdditionalSemanticInputs());
+
+      List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
+
+      // add dependencies so index query runs first
+      insertIndexQuery(pctx, context, chosenRewrite);
+    }
+
+    return null;
+  }
+
+  /**
+   * Get a list of Tasks to activate use of indexes.
+   * Generate the tasks for the index query (where we store results of
+   * querying the index in a tmp file) inside the IndexHandler
+   * @param predicate Predicate of query to rewrite
+   * @param index Index to use for rewrite
+   * @param pctx
+   * @param task original task before rewrite
+   * @param queryContext stores return values
+   */
+  private void rewriteForIndex(ExprNodeDesc predicate, Index index,
+                                ParseContext pctx, Task<MapredWork> task,
+                                HiveIndexQueryContext queryContext)
+                                throws SemanticException {
+    HiveIndexHandler indexHandler;
+    try {
+      indexHandler = HiveUtils.getIndexHandler(pctx.getConf(), index.getIndexHandlerClass());
+    } catch (HiveException e) {
+      LOG.error("Exception while loading IndexHandler: " + index.getIndexHandlerClass(), e);
+      throw new SemanticException("Failed to load indexHandler: " + index.getIndexHandlerClass(), e);
+    }
+
+    // check the size
+    try {
+      ContentSummary inputSummary = Utilities.getInputSummary(pctx.getContext(), task.getWork(), null);
+      long inputSize = inputSummary.getLength();
+      if (!indexHandler.checkQuerySize(inputSize, pctx.getConf())) {
+        queryContext.setQueryTasks(null);
+        return;
+      }
+    } catch (IOException e) {
+      throw new SemanticException("Failed to get task size", e);
+    }
+
+    // use the IndexHandler to generate the index query
+    indexHandler.generateIndexQuery(index, predicate, pctx, queryContext);
+    // TODO HIVE-2115 use queryContext.residualPredicate to process residual predicate
+
+    return;
+  }
+
+
+
+  /**
+   * Check the partitions used by the table scan to make sure they also exist in the
+   * index table
+   * @param pctx
+   * @param operator
+   * @return partitions used by query.  null if they do not exist in index table
+   */
+  private Set<Partition> checkPartitionsCoveredByIndex(FilterOperator operator, ParseContext pctx)
+    throws HiveException {
+    TableScanOperator tableScan = (TableScanOperator) operator.getParentOperators().get(0);
+    Hive hive = Hive.get(pctx.getConf());
+
+    // make sure each partition exists on the index table
+    PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
+    Set<Partition> queryPartitions = queryPartitionList.getConfirmedPartns();
+
+    for (Partition part : queryPartitions) {
+      List<Table> sourceIndexTables = getIndexTables(hive, part);
+      if (!containsPartition(hive, sourceIndexTables, part)) {
+        return null; // problem if it doesn't contain the partition
+      }
+    }
+
+    return queryPartitions;
+  }
+
+  /**
+   * return index tables associated with the base table of the partition
+   */
+  private List<Table> getIndexTables(Hive hive, Partition part) throws HiveException {
+    List<Table> indexTables = new ArrayList<Table>();
+    Table partitionedTable = part.getTable();
+    for (Index index : indexes.get(partitionedTable)) {
+      indexTables.add(hive.getTable(index.getIndexTableName()));
+    }
+    return indexTables;
+  }
+
+  /**
+   * check that every index table contains the given partition
+   */
+  private boolean containsPartition(Hive hive, List<Table> indexTables, Partition part)
+    throws HiveException {
+    HashMap<String, String> partSpec = part.getSpec();
+
+    if (partSpec.isEmpty()) {
+      return true; // empty specs come from non-partitioned tables
+    }
+
+    for (Table indexTable : indexTables) {
+      // get partitions that match the spec
+      List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);
+      if (matchingPartitions == null || matchingPartitions.size() == 0) {
+        LOG.info("Index table " + indexTable + "did not contain built partition that matched " + partSpec);
+        return false;
+      }
+    }
+    return true;
+  }
+
+
+  /**
+   * Insert the rewrite tasks at the head of the pctx task tree
+   * @param pctx
+   * @param context
+   * @param chosenRewrite
+   */
+  private void insertIndexQuery(ParseContext pctx, IndexWhereProcCtx context, List<Task<?>> chosenRewrite) {
+    Task<?> wholeTableScan = context.getCurrentTask();
+    LinkedHashSet<Task<?>> rewriteLeaves = new LinkedHashSet<Task<?>>();
+    findLeaves(chosenRewrite, rewriteLeaves);
+
+    for (Task<?> leaf : rewriteLeaves) {
+      leaf.addDependentTask(wholeTableScan); // add full scan task as child for every index query task
+    }
+
+    // replace the original with the index sub-query as a root task
+    pctx.replaceRootTask(wholeTableScan, chosenRewrite);
+  }
+
+  /**
+   * Find the leaves of the task tree
+   */
+  private void findLeaves(List<Task<?>> tasks, Set<Task<?>> leaves) {
+    for (Task<?> t : tasks) {
+      if (t.getDependentTasks() == null) {
+        leaves.add(t);
+      } else {
+        findLeaves(t.getDependentTasks(), leaves);
+      }
+    }
+  }
+
+}
+

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java Mon May  2 19:10:42 2011
@@ -0,0 +1,165 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical.index;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ *
+ * IndexWhereTaskDispatcher.  Walks a Task tree, and for the right kind of Task,
+ * walks the operator tree to create an index subquery.  Then attaches the
+ * subquery task to the task tree.
+ *
+ */
+public class IndexWhereTaskDispatcher implements Dispatcher {
+
+  private final PhysicalContext physicalContext;
+
+  public IndexWhereTaskDispatcher(PhysicalContext context) {
+    super();
+    physicalContext = context;
+  }
+
+  @Override
+  public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
+      throws SemanticException {
+
+    Task<? extends Serializable> task = (Task<? extends Serializable>) nd;
+
+    ParseContext pctx = physicalContext.getParseContext();
+
+    // create the regex's so the walker can recognize our WHERE queries
+    Map<Rule, NodeProcessor> operatorRules = createOperatorRules(pctx);
+
+    // check for no indexes on any table
+    if (operatorRules == null) {
+      return null;
+    }
+
+    // create context so the walker can carry the current task with it.
+    IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx);
+
+    // create the dispatcher, which fires the processor according to the rule that
+    // best matches
+    Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(),
+                                                      operatorRules,
+                                                      indexWhereOptimizeCtx);
+
+    // walk the mapper operator(not task) tree
+    GraphWalker ogw = new DefaultGraphWalker(dispatcher);
+    ArrayList<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(pctx.getTopOps().values());
+    ogw.startWalking(topNodes, null);
+
+    return null;
+  }
+
+  /**
+   * Create a set of rules that only matches WHERE predicates on columns we have
+   * an index on.
+   * @return
+   */
+  private Map<Rule, NodeProcessor> createOperatorRules(ParseContext pctx) throws SemanticException {
+    Map<Rule, NodeProcessor> operatorRules = new LinkedHashMap<Rule, NodeProcessor>();
+
+    List<String> supportedIndexes = new ArrayList<String>();
+    supportedIndexes.add(CompactIndexHandler.class.getName());
+
+    // query the metastore to know what columns we have indexed
+    Collection<Table> topTables = pctx.getTopToTable().values();
+    Map<Table, List<Index>> indexes = new HashMap<Table, List<Index>>();
+    for (Table tbl : topTables)
+    {
+      List<Index> tblIndexes = getIndexes(tbl, supportedIndexes);
+      if (tblIndexes.size() > 0) {
+        indexes.put(tbl, tblIndexes);
+      }
+    }
+
+    // quit if our tables don't have any indexes
+    if (indexes.size() == 0) {
+      return null;
+    }
+
+    // FIL% is a filter operator, a WHERE shows up as a filter on a table scan operator (TS%)
+    operatorRules.put(new RuleRegExp("RULEWhere", "TS%FIL%"), new IndexWhereProcessor(indexes));
+
+    return operatorRules;
+  }
+
+  /**
+   * Get a list of indexes on a table that match given types.
+   * Copied from HIVE-1694 patch
+   */
+  private List<Index> getIndexes(Table baseTableMetaData, List<String> matchIndexTypes)
+    throws SemanticException {
+    List<Index> matchingIndexes = new ArrayList<Index>();
+    List<Index> indexesOnTable = null;
+
+    try {
+      indexesOnTable = baseTableMetaData.getAllIndexes((short) -1); // get all indexes
+    } catch (HiveException e) {
+      throw new SemanticException("Error accessing metastore", e);
+    }
+
+    for (Index index : indexesOnTable) {
+      String indexType = index.getIndexHandlerClass();
+      if (matchIndexTypes.contains(indexType)) {
+        matchingIndexes.add(index);
+      }
+    }
+    return matchingIndexes;
+  }
+
+  private NodeProcessor getDefaultProcessor() {
+    return new NodeProcessor() {
+      @Override
+      public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+                            Object... nodeOutputs) throws SemanticException {
+        return null;
+      }
+    };
+  }
+
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Mon May  2 19:10:42 2011
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.parse;
 
 import java.io.Serializable;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -33,7 +34,9 @@ import org.apache.hadoop.hive.ql.exec.Jo
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -94,6 +97,9 @@ public class ParseContext {
 
   private SemanticAnalyzer.GlobalLimitCtx globalLimitCtx;
 
+  private HashSet<ReadEntity> semanticInputs;
+  private List<Task<? extends Serializable>> rootTasks;
+  
   public ParseContext() {
   }
 
@@ -129,6 +135,8 @@ public class ParseContext {
    *          list of map join operators with no reducer
    * @param opToSamplePruner
    *          operator to sample pruner map
+   * @param semanticInputs
+   * @param rootTasks
    */
   public ParseContext(
       HiveConf conf,
@@ -148,7 +156,8 @@ public class ParseContext {
       Map<String, PrunedPartitionList> prunedPartitions,
       HashMap<TableScanOperator, sampleDesc> opToSamplePruner,
       SemanticAnalyzer.GlobalLimitCtx globalLimitCtx,
-      HashMap<String, SplitSample> nameToSplitSample) {
+      HashMap<String, SplitSample> nameToSplitSample,
+      HashSet<ReadEntity> semanticInputs, List<Task<? extends Serializable>> rootTasks) {
     this.conf = conf;
     this.qb = qb;
     this.ast = ast;
@@ -173,6 +182,8 @@ public class ParseContext {
     this.opToSamplePruner = opToSamplePruner;
     this.nameToSplitSample = nameToSplitSample;
     this.globalLimitCtx = globalLimitCtx;
+    this.semanticInputs = semanticInputs;
+    this.rootTasks = rootTasks;
   }
 
   /**
@@ -509,4 +520,14 @@ public class ParseContext {
   public void setGlobalLimitCtx(SemanticAnalyzer.GlobalLimitCtx globalLimitCtx) {
     this.globalLimitCtx = globalLimitCtx;
   }
+
+  public HashSet<ReadEntity> getSemanticInputs() {
+    return semanticInputs;
+  }
+
+  public void replaceRootTask(Task<? extends Serializable> rootTask,
+                              List<? extends Task<? extends Serializable>> tasks) {
+    this.rootTasks.remove(rootTask);
+    this.rootTasks.addAll(tasks);
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon May  2 19:10:42 2011
@@ -315,7 +315,7 @@ public class SemanticAnalyzer extends Ba
         topSelOps, opParseCtx, joinContext, topToTable, loadTableWork,
         loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
         listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
-        opToSamplePruner, globalLimitCtx, nameToSplitSample);
+        opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks);
   }
 
   @SuppressWarnings("nls")
@@ -6856,7 +6856,7 @@ public class SemanticAnalyzer extends Ba
         opToPartList, topOps, topSelOps, opParseCtx, joinContext, topToTable,
         loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
         listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
-        opToSamplePruner, globalLimitCtx, nameToSplitSample);
+        opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks);
 
     Optimizer optm = new Optimizer();
     optm.setPctx(pCtx);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1098742&r1=1098741&r2=1098742&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Mon May  2 19:10:42 2011
@@ -74,6 +74,7 @@ public class MapredWork implements Seria
 
   private MapredLocalWork mapLocalWork;
   private String inputformat;
+  private String indexIntermediateFile;
   private boolean gatheringStats;
 
   private String tmpHDFSFileURI;
@@ -380,6 +381,14 @@ public class MapredWork implements Seria
     this.inputformat = inputformat;
   }
 
+  public String getIndexIntermediateFile() {
+    return indexIntermediateFile;
+  }
+
+  public void setIndexIntermediateFile(String fileName) {
+    this.indexIntermediateFile = fileName;
+  }
+
   public void setGatheringStats(boolean gatherStats) {
     this.gatheringStats = gatherStats;
   }

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto.q?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto.q Mon May  2 19:10:42 2011
@@ -0,0 +1,28 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+
+CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+-- manual indexing
+INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` ,  `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100;
+SET hive.index.compact.file=/tmp/index_where;
+SET hive.optimize.index.filter=false;
+SET hive.input.format=org.apache.hadoop.hive.ql.index.compact.HiveCompactIndexInputFormat;
+
+EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+
+DROP INDEX src_index on src;
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_file_format.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_file_format.q?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_file_format.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_file_format.q Mon May  2 19:10:42 2011
@@ -0,0 +1,19 @@
+-- test automatic use of index on different file formats
+CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
+SELECT key, value FROM src WHERE key=86 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
+SELECT key, value FROM src WHERE key=86 ORDER BY key;
+
+DROP INDEX src_index on src;
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_multiple.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_multiple.q?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_multiple.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_multiple.q Mon May  2 19:10:42 2011
@@ -0,0 +1,16 @@
+-- With multiple indexes, make sure we choose which to use in a consistent order
+
+CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
+CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_key_index ON src REBUILD;
+ALTER INDEX src_val_index ON src REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
+SELECT key, value FROM src WHERE key=86 ORDER BY key;
+
+DROP INDEX src_key_index ON src;
+DROP INDEX src_val_index ON src;
\ No newline at end of file

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q Mon May  2 19:10:42 2011
@@ -0,0 +1,12 @@
+-- test automatic use of index on table with partitions
+CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_part_index ON srcpart REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+EXPLAIN SELECT key, value FROM srcpart WHERE key=86 ORDER BY key;
+SELECT key, value FROM srcpart WHERE key=86 ORDER BY key;
+
+DROP INDEX src_part_index ON srcpart;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_unused.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_unused.q?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_unused.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_unused.q Mon May  2 19:10:42 2011
@@ -0,0 +1,60 @@
+-- test cases where the index should not be used automatically
+
+CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=5368709120;
+SET hive.optimize.index.filter.compact.maxsize=-1;
+
+-- min size too large (src is less than 5G)
+EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+SET hive.optimize.index.filter.compact.maxsize=1;
+
+-- max size too small
+EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+SET hive.optimize.index.filter.compact.maxsize=-1;
+
+-- OR predicate not supported by compact indexes
+EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key;
+SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key;
+
+ SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+SET hive.optimize.index.filter.compact.maxsize=-1;
+
+-- columns are not covered by indexes
+DROP INDEX src_index on src;
+CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_val_index ON src REBUILD;
+
+EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key;
+
+DROP INDEX src_val_index on src;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+SET hive.optimize.index.filter.compact.maxsize=-1;
+
+-- required partitions have not been built yet
+CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_part_index ON srcpart PARTITION (ds='2008-04-08', hr=11) REBUILD;
+
+EXPLAIN SELECT * FROM srcpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 ORDER BY key;
+SELECT * FROM srcpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 ORDER BY key;
+
+DROP INDEX src_part_index on srcpart;

Added: hive/trunk/ql/src/test/results/clientpositive/index_auto.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto.q.out?rev=1098742&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto.q.out Mon May  2 19:10:42 2011
@@ -0,0 +1,313 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-48-49_890_8915143803732182055/-mr-10000
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-48-49_890_8915143803732182055/-mr-10000
+82	val_82
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+85	val_85
+86	val_86
+87	val_87
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX src_index ON src REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@default__src_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@default__src_src_index__
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- manual indexing
+INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` ,  `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Output: /tmp/index_where
+POSTHOOK: query: -- manual indexing
+INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` ,  `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Output: /tmp/index_where
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Filter Operator
+                predicate:
+                    expr: ((key > 80) and (key < 100))
+                    type: boolean
+                Select Operator
+                  expressions:
+                        expr: key
+                        type: string
+                        expr: value
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                    sort order: +
+                    tag: -1
+                    value expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-24_225_5627614687777405478/-mr-10000
+POSTHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-24_225_5627614687777405478/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+82	val_82
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+85	val_85
+86	val_86
+87	val_87
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: -- automatic indexing
+EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- automatic indexing
+EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4
+  Stage-5
+  Stage-2 depends on stages: Stage-5, Stage-4
+  Stage-1 depends on stages: Stage-2
+  Stage-4
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        default__src_src_index__ 
+          TableScan
+            alias: default__src_src_index__
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Filter Operator
+                predicate:
+                    expr: ((key > 80) and (key < 100))
+                    type: boolean
+                Select Operator
+                  expressions:
+                        expr: _bucketname
+                        type: string
+                        expr: _offsets
+                        type: array<bigint>
+                  outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-49-31_422_3293105246070967294/-ext-10000
+
+  Stage: Stage-2
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-31_020_3781399652075665616/-mr-10002
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Filter Operator
+                predicate:
+                    expr: ((key > 80) and (key < 100))
+                    type: boolean
+                Select Operator
+                  expressions:
+                        expr: key
+                        type: string
+                        expr: value
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                    sort order: +
+                    tag: -1
+                    value expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-49-31_422_3293105246070967294/-ext-10001 
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-31_665_4650797773210786014/-mr-10000
+POSTHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-31_665_4650797773210786014/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+82	val_82
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+85	val_85
+86	val_86
+87	val_87
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: DROP INDEX src_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX src_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]



Mime
View raw message