hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject svn commit: r1150962 [1/2] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/index/ java/org/apache/hadoop/hive/ql/index/bitmap/ java/org/apache/hadoop/hive/ql/index/compact/ java/org/apache/hadoop/hive/ql/optimizer/physical/index/ java/org/apach...
Date Tue, 26 Jul 2011 00:12:39 GMT
Author: jvs
Date: Tue Jul 26 00:12:38 2011
New Revision: 1150962

URL: http://svn.apache.org/viewvc?rev=1150962&view=rev
Log:
HIVE-2128. Automatic Indexing with multiple tables.
(Syed Albiz via jvs)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q
    hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java Tue Jul 26 00:12:38 2011
@@ -82,7 +82,7 @@ public class HiveIndexResult {
   BytesRefWritable[] bytesRef = new BytesRefWritable[2];
   boolean ignoreHdfsLoc = false;
 
-  public HiveIndexResult(String indexFile, JobConf conf) throws IOException,
+  public HiveIndexResult(List<String> indexFiles, JobConf conf) throws IOException,
       HiveException {
     job = conf;
 
@@ -90,18 +90,20 @@ public class HiveIndexResult {
     bytesRef[1] = new BytesRefWritable();
     ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC);
 
-    if (indexFile != null) {
-      Path indexFilePath = new Path(indexFile);
+    if (indexFiles != null && indexFiles.size() > 0) {
       FileSystem fs = FileSystem.get(conf);
-      FileStatus indexStat = fs.getFileStatus(indexFilePath);
       List<Path> paths = new ArrayList<Path>();
-      if (indexStat.isDir()) {
-        FileStatus[] fss = fs.listStatus(indexFilePath);
-        for (FileStatus f : fss) {
-          paths.add(f.getPath());
+      for (String indexFile : indexFiles) {
+        Path indexFilePath = new Path(indexFile);
+        FileStatus indexStat = fs.getFileStatus(indexFilePath);
+        if (indexStat.isDir()) {
+          FileStatus[] fss = fs.listStatus(indexFilePath);
+          for (FileStatus f : fss) {
+            paths.add(f.getPath());
+          }
+        } else {
+          paths.add(indexFilePath);
         }
-      } else {
-        paths.add(indexFilePath);
       }
 
       long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java Tue Jul 26 00:12:38 2011
@@ -20,12 +20,18 @@ package org.apache.hadoop.hive.ql.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.Iterator;
 import java.util.Set;
+import java.util.Map;
+import java.util.Arrays;
+import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -91,15 +97,27 @@ public class HiveIndexedInputFormat exte
     return result.toArray(new HiveInputSplit[result.size()]);
   }
 
+  public static List<String> getIndexFiles(String indexFileStr) {
+    // tokenize and store string of form (path,)+
+    if (indexFileStr == null) {
+      return null;
+    }
+    String[] chunks = indexFileStr.split(",");
+    return Arrays.asList(chunks);
+  }
+
   @Override
   public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
     String indexFileStr = job.get(indexFile);
     l4j.info("index_file is " + indexFileStr);
+    List<String> indexFiles = getIndexFiles(indexFileStr);
 
     HiveIndexResult hiveIndexResult = null;
-    if (indexFileStr != null) {
+    if (indexFiles != null) {
+      boolean first = true;
+      StringBuilder newInputPaths = new StringBuilder();
       try {
-        hiveIndexResult = new HiveIndexResult(indexFileStr, job);
+        hiveIndexResult = new HiveIndexResult(indexFiles, job);
       } catch (HiveException e) {
         l4j.error("Unable to read index..");
         throw new IOException(e);
@@ -107,8 +125,6 @@ public class HiveIndexedInputFormat exte
 
       Set<String> inputFiles = hiveIndexResult.buckets.keySet();
       Iterator<String> iter = inputFiles.iterator();
-      boolean first = true;
-      StringBuilder newInputPaths = new StringBuilder();
       while(iter.hasNext()) {
         String path = iter.next();
         if (path.trim().equalsIgnoreCase("")) {
@@ -121,7 +137,6 @@ public class HiveIndexedInputFormat exte
         }
         newInputPaths.append(path);
       }
-
       FileInputFormat.setInputPaths(job, newInputPaths.toString());
     } else {
       return super.getSplits(job, numSplits);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java Tue Jul 26 00:12:38 2011
@@ -81,14 +81,6 @@ public class BitmapIndexHandler extends 
       return; // abort if we couldn't pull out anything from the predicate
     }
 
-    // Build reentrant QL for index query
-    StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
-
-    String tmpFile = pctx.getContext().getMRTmpFileURI();
-    qlCommand.append( "\"" + tmpFile + "\" ");            // QL includes " around file name
-    qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
-    qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
-
     List<BitmapInnerQuery> iqs = new ArrayList<BitmapInnerQuery>(indexes.size());
     int i = 0;
     for (Index index : indexes) {
@@ -100,6 +92,17 @@ public class BitmapIndexHandler extends 
               "ind" + i++));
       }
     }
+    // setup TableScanOperator to change input format for original query
+    queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName());
+
+    // Build reentrant QL for index query
+    StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
+
+    String tmpFile = pctx.getContext().getMRTmpFileURI();
+    qlCommand.append( "\"" + tmpFile + "\" ");            // QL includes " around file name
+    qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
+    qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
+
 
     BitmapQuery head = iqs.get(0);
     for ( i = 1; i < iqs.size(); i++) {
@@ -113,10 +116,7 @@ public class BitmapIndexHandler extends 
     Driver driver = new Driver(pctx.getConf());
     driver.compile(qlCommand.toString(), false);
 
-    // setup TableScanOperator to change input format for original query
-    queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName());
     queryContext.setIndexIntermediateFile(tmpFile);
-
     queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
     queryContext.setQueryTasks(driver.getPlan().getRootTasks());
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Tue Jul 26 00:12:38 2011
@@ -161,11 +161,14 @@ public class CompactIndexHandler extends
 
     // pass residual predicate back out for further processing
     queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
+    // setup TableScanOperator to change input format for original query
+    queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
 
     // Build reentrant QL for index query
     StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
 
     String tmpFile = pctx.getContext().getMRTmpFileURI();
+    queryContext.setIndexIntermediateFile(tmpFile);
     qlCommand.append( "\"" + tmpFile + "\" ");            // QL includes " around file name
     qlCommand.append("SELECT `_bucketname` ,  `_offsets` FROM ");
     qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
@@ -179,9 +182,6 @@ public class CompactIndexHandler extends
     Driver driver = new Driver(pctx.getConf());
     driver.compile(qlCommand.toString(), false);
 
-    // setup TableScanOperator to change input format for original query
-    queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
-    queryContext.setIndexIntermediateFile(tmpFile);
 
     queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
     queryContext.setQueryTasks(driver.getPlan().getRootTasks());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java Tue Jul 26 00:12:38 2011
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.parse.P
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 
@@ -83,11 +84,15 @@ public class IndexWhereProcessor impleme
     TableScanOperator operator = (TableScanOperator) nd;
     List<Node> opChildren = operator.getChildren();
     TableScanDesc operatorDesc = operator.getConf();
+    if (operatorDesc == null) {
+      return null;
+    }
     ExprNodeDesc predicate = operatorDesc.getFilterExpr();
 
     IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
     ParseContext pctx = context.getParseContext();
     LOG.info("Processing predicate for index optimization");
+
     if (predicate == null) {
       LOG.info("null predicate pushed down");
       return null;
@@ -114,47 +119,42 @@ public class IndexWhereProcessor impleme
 
     // get potential reentrant index queries from each index
     Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
-    Collection<List<Index>> tableIndexes = indexes.values();
-    for (List<Index> indexesOnTable : tableIndexes) {
-      List<List<Index>> indexesByType = new ArrayList<List<Index>>();
-      for (Index index : indexesOnTable) {
-        boolean added = false;
-        for (List<Index> indexType : indexesByType) {
-          if (indexType.isEmpty()) {
-            indexType.add(index);
-            added = true;
-          } else if (indexType.get(0).getIndexHandlerClass().equals(
-                index.getIndexHandlerClass())) {
-            indexType.add(index);
-            added = true;
-            break;
-          }
-        }
-        if (!added) {
-          List<Index> newType = new ArrayList<Index>();
-          newType.add(index);
-          indexesByType.add(newType);
-        }
+    // make sure we have an index on the table being scanned
+    TableDesc tblDesc = operator.getTableDesc();
+    Table srcTable = pctx.getTopToTable().get(operator);
+    if (indexes == null || indexes.get(srcTable) == null) {
+      return null;
+    }
+
+    List<Index> tableIndexes = indexes.get(srcTable);
+    Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
+    for (Index indexOnTable : tableIndexes) {
+      if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
+        List<Index> newType = new ArrayList<Index>();
+        newType.add(indexOnTable);
+        indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
+      } else {
+        indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
       }
+    }
 
-      // choose index type with most indexes of the same type on the table
-      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
-      List<Index> bestIndexes = indexesByType.get(0);
-      for (List<Index> indexTypes : indexesByType) {
-        if (bestIndexes.size() < indexTypes.size()) {
-          bestIndexes = indexTypes;
-        }
+    // choose index type with most indexes of the same type on the table
+    // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
+    List<Index> bestIndexes = indexesByType.values().iterator().next();
+    for (List<Index> indexTypes : indexesByType.values()) {
+      if (bestIndexes.size() < indexTypes.size()) {
+        bestIndexes = indexTypes;
       }
+    }
 
-      // rewrite index queries for the chosen index type
-      HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
-      queryContext.setQueryPartitions(queryPartitions);
-      rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext);
-      List<Task<?>> indexTasks = queryContext.getQueryTasks();
+    // rewrite index queries for the chosen index type
+    HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
+    tmpQueryContext.setQueryPartitions(queryPartitions);
+    rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
+    List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
 
-      if (indexTasks != null && indexTasks.size() > 0) {
-        queryContexts.put(bestIndexes.get(0), queryContext);
-      }
+    if (indexTasks != null && indexTasks.size() > 0) {
+      queryContexts.put(bestIndexes.get(0), tmpQueryContext);
     }
     // choose an index rewrite to use
     if (queryContexts.size() > 0) {
@@ -168,8 +168,7 @@ public class IndexWhereProcessor impleme
       // prepare the map reduce job to use indexing
       MapredWork work = currentTask.getWork();
       work.setInputformat(queryContext.getIndexInputFormat());
-      work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());
-
+      work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
       // modify inputs based on index query
       Set<ReadEntity> inputs = pctx.getSemanticInputs();
       inputs.addAll(queryContext.getAdditionalSemanticInputs());
@@ -226,8 +225,6 @@ public class IndexWhereProcessor impleme
     return;
   }
 
-
-
   /**
    * Check the partitions used by the table scan to make sure they also exist in the
    * index table
@@ -239,6 +236,7 @@ public class IndexWhereProcessor impleme
     throws HiveException {
     Hive hive = Hive.get(pctx.getConf());
 
+
     // make sure each partition exists on the index table
     PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
     Set<Partition> queryPartitions = queryPartitionList.getConfirmedPartns();
@@ -259,6 +257,9 @@ public class IndexWhereProcessor impleme
   private List<Table> getIndexTables(Hive hive, Partition part) throws HiveException {
     List<Table> indexTables = new ArrayList<Table>();
     Table partitionedTable = part.getTable();
+    if (indexes == null || indexes.get(partitionedTable) == null) {
+      return indexTables;
+    }
     for (Index index : indexes.get(partitionedTable)) {
       indexTables.add(hive.getTable(index.getIndexTableName()));
     }
@@ -276,6 +277,10 @@ public class IndexWhereProcessor impleme
       return true; // empty specs come from non-partitioned tables
     }
 
+    if (indexTables == null || indexTables.size() == 0) {
+      return false;
+    }
+
     for (Table indexTable : indexTables) {
       // get partitions that match the spec
       List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java Tue Jul 26 00:12:38 2011
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
 
 /**
  *
@@ -87,10 +88,14 @@ public class IndexWhereTaskDispatcher im
                                                       operatorRules,
                                                       indexWhereOptimizeCtx);
 
-    // walk the mapper operator(not task) tree
+    // walk the mapper operator(not task) tree for each specific task
     GraphWalker ogw = new DefaultGraphWalker(dispatcher);
     ArrayList<Node> topNodes = new ArrayList<Node>();
-    topNodes.addAll(pctx.getTopOps().values());
+    if (task.getWork() instanceof MapredWork) {
+      topNodes.addAll(((MapredWork)task.getWork()).getAliasToWork().values());
+    } else {
+      return null;
+    }
     ogw.startWalking(topNodes, null);
 
     return null;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Tue Jul 26 00:12:38 2011
@@ -387,8 +387,12 @@ public class MapredWork implements Seria
     return indexIntermediateFile;
   }
 
-  public void setIndexIntermediateFile(String fileName) {
-    this.indexIntermediateFile = fileName;
+  public void addIndexIntermediateFile(String fileName) {
+    if (this.indexIntermediateFile == null) {
+      this.indexIntermediateFile = fileName;
+    } else {
+      this.indexIntermediateFile += "," + fileName;
+    }
   }
 
   public void setGatheringStats(boolean gatherStats) {

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q Tue Jul 26 00:12:38 2011
@@ -0,0 +1,23 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+
+CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX srcpart_index ON srcpart REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+DROP INDEX src_index on src;
+DROP INDEX srcpart_index on src;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q Tue Jul 26 00:12:38 2011
@@ -0,0 +1,23 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+
+CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX srcpart_index ON srcpart REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+DROP INDEX src_index on src;
+DROP INDEX srcpart_index on src;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q Tue Jul 26 00:12:38 2011
@@ -0,0 +1,18 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+DROP INDEX src_index on src;

Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out Tue Jul 26 00:12:38 2011
@@ -0,0 +1,638 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: ((key > 70) and (key < 90))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_232_8620953468013110737/-mr-10002 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_720_2821092727071549075/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_720_2821092727071549075/-mr-10000
+82	val_82
+82	val_82
+82	val_82
+82	val_82
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+85	val_85
+85	val_85
+85	val_85
+85	val_85
+86	val_86
+86	val_86
+86	val_86
+86	val_86
+87	val_87
+87	val_87
+87	val_87
+87	val_87
+PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX src_index ON src REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@default__src_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@default__src_src_index__
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-4 depends on stages: Stage-5
+  Stage-1 depends on stages: Stage-4, Stage-6
+  Stage-2 depends on stages: Stage-1
+  Stage-7 is a root stage
+  Stage-6 depends on stages: Stage-7
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce
+      Alias -> Map Operator Tree:
+        tmp_index:ind0:default__srcpart_srcpart_index__ 
+          TableScan
+            alias: default__srcpart_srcpart_index__
+            filterExpr:
+                expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: _bucketname
+                      type: string
+                      expr: _offset
+                      type: bigint
+                      expr: _bitmaps
+                      type: array<bigint>
+                outputColumnNames: _col1, _col2, _col3
+                Select Operator
+                  expressions:
+                        expr: _col1
+                        type: string
+                        expr: _col2
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    Group By Operator
+                      aggregations:
+                            expr: collect_set(_col1)
+                      bucketGroup: false
+                      keys:
+                            expr: _col0
+                            type: string
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Reduce Output Operator
+                        key expressions:
+                              expr: _col0
+                              type: string
+                        sort order: +
+                        Map-reduce partition columns:
+                              expr: _col0
+                              type: string
+                        tag: -1
+                        value expressions:
+                              expr: _col1
+                              type: array<bigint>
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: collect_set(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: array<bigint>
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 1
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10003
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            filterExpr:
+                expr: ((key > 80) and (key < 100))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+        b 
+          TableScan
+            alias: b
+            filterExpr:
+                expr: ((key > 70) and (key < 90))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 70) and (key < 90))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10002 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-7
+    Map Reduce
+      Alias -> Map Operator Tree:
+        tmp_index:ind0:default__src_src_index__ 
+          TableScan
+            alias: default__src_src_index__
+            filterExpr:
+                expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: _bucketname
+                      type: string
+                      expr: _offset
+                      type: bigint
+                      expr: _bitmaps
+                      type: array<bigint>
+                outputColumnNames: _col1, _col2, _col3
+                Select Operator
+                  expressions:
+                        expr: _col1
+                        type: string
+                        expr: _col2
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    Group By Operator
+                      aggregations:
+                            expr: collect_set(_col1)
+                      bucketGroup: false
+                      keys:
+                            expr: _col0
+                            type: string
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Reduce Output Operator
+                        key expressions:
+                              expr: _col0
+                              type: string
+                        sort order: +
+                        Map-reduce partition columns:
+                              expr: _col0
+                              type: string
+                        tag: -1
+                        value expressions:
+                              expr: _col1
+                              type: array<bigint>
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: collect_set(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: array<bigint>
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 1
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10004
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-03_417_3979280982191225396/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-03_417_3979280982191225396/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+82	val_82
+82	val_82
+82	val_82
+82	val_82
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+85	val_85
+85	val_85
+85	val_85
+85	val_85
+86	val_86
+86	val_86
+86	val_86
+86	val_86
+87	val_87
+87	val_87
+87	val_87
+87	val_87
+PREHOOK: query: DROP INDEX src_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX src_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP INDEX srcpart_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX srcpart_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]

Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out Tue Jul 26 00:12:38 2011
@@ -0,0 +1,564 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: ((key > 70) and (key < 90))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_433_5542638095321427981/-mr-10002 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_581_3747634705901315299/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_581_3747634705901315299/-mr-10000
+82	val_82
+82	val_82
+82	val_82
+82	val_82
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+85	val_85
+85	val_85
+85	val_85
+85	val_85
+86	val_86
+86	val_86
+86	val_86
+86	val_86
+87	val_87
+87	val_87
+87	val_87
+87	val_87
+PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX src_index ON src REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@default__src_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@default__src_src_index__
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-8 depends on stages: Stage-5 , consists of Stage-7, Stage-6
+  Stage-7
+  Stage-4 depends on stages: Stage-7, Stage-6
+  Stage-1 depends on stages: Stage-4, Stage-9
+  Stage-2 depends on stages: Stage-1
+  Stage-6
+  Stage-10 is a root stage
+  Stage-13 depends on stages: Stage-10 , consists of Stage-12, Stage-11
+  Stage-12
+  Stage-9 depends on stages: Stage-12, Stage-11
+  Stage-11
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce
+      Alias -> Map Operator Tree:
+        default__srcpart_srcpart_index__ 
+          TableScan
+            alias: default__srcpart_srcpart_index__
+            filterExpr:
+                expr: ((key > 70) and (key < 90))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 70) and (key < 90))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: _bucketname
+                      type: string
+                      expr: _offsets
+                      type: array<bigint>
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-7
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_494_7032523871014076058/-ext-10000
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10003
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            filterExpr:
+                expr: ((key > 80) and (key < 100))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+        b 
+          TableScan
+            alias: b
+            filterExpr:
+                expr: ((key > 70) and (key < 90))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 70) and (key < 90))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10002 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_494_7032523871014076058/-ext-10001 
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-10
+    Map Reduce
+      Alias -> Map Operator Tree:
+        default__src_src_index__ 
+          TableScan
+            alias: default__src_src_index__
+            filterExpr:
+                expr: ((key > 80) and (key < 100))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 80) and (key < 100))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: _bucketname
+                      type: string
+                      expr: _offsets
+                      type: array<bigint>
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-13
+    Conditional Operator
+
+  Stage: Stage-12
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_590_77690777312722941/-ext-10000
+
+  Stage: Stage-9
+    Move Operator
+      files:
+          hdfs directory: true
+          destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10004
+
+  Stage: Stage-11
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_590_77690777312722941/-ext-10001 
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-35_695_2966341580180350713/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-35_695_2966341580180350713/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+82	val_82
+82	val_82
+82	val_82
+82	val_82
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+84	val_84
+85	val_85
+85	val_85
+85	val_85
+85	val_85
+86	val_86
+86	val_86
+86	val_86
+86	val_86
+87	val_87
+87	val_87
+87	val_87
+87	val_87
+PREHOOK: query: DROP INDEX src_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX src_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP INDEX srcpart_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX srcpart_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]



Mime
View raw message