hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1199117 - in /hive/trunk: data/files/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/src/java/org/apache/hadoop/hive/ql/plan/ ql/src...
Date Tue, 08 Nov 2011 05:53:00 GMT
Author: namit
Date: Tue Nov  8 05:52:59 2011
New Revision: 1199117

URL: http://svn.apache.org/viewvc?rev=1199117&view=rev
Log:
HIVE-2466 mapjoin_subquery dump small table (mapjoin table) to the same file
(binlijin via namit)


Added:
    hive/trunk/data/files/x.txt
    hive/trunk/data/files/y.txt
    hive/trunk/data/files/z.txt
    hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
    hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java

Added: hive/trunk/data/files/x.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/x.txt?rev=1199117&view=auto
==============================================================================
--- hive/trunk/data/files/x.txt (added)
+++ hive/trunk/data/files/x.txt Tue Nov  8 05:52:59 2011
@@ -0,0 +1,2 @@
+Joe	2
+Hank	2

Added: hive/trunk/data/files/y.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/y.txt?rev=1199117&view=auto
==============================================================================
--- hive/trunk/data/files/y.txt (added)
+++ hive/trunk/data/files/y.txt Tue Nov  8 05:52:59 2011
@@ -0,0 +1 @@
+2	Tie

Added: hive/trunk/data/files/z.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/z.txt?rev=1199117&view=auto
==============================================================================
--- hive/trunk/data/files/z.txt (added)
+++ hive/trunk/data/files/z.txt Tue Nov  8 05:52:59 2011
@@ -0,0 +1 @@
+2	Tie

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Nov
 8 05:52:59 2011
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.util.ReflectionUtils;
 
@@ -403,7 +404,7 @@ public class HashTableSinkOperator exten
             bigBucketFileName = "-";
           }
           // get the tmp URI path; it will be a hdfs path if not local mode
-          String tmpURIPath = Utilities.generatePath(tmpURI, tag, bigBucketFileName);
+          String tmpURIPath = Utilities.generatePath(tmpURI, conf.getDumpFilePrefix(), tag,
bigBucketFileName);
           hashTable.isAbort(rowNumber, console);
           console.printInfo(Utilities.now() + "\tDump the hashtable into file: " + tmpURIPath);
           // get the hashtable file and path

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue Nov  8
05:52:59 2011
@@ -188,7 +188,7 @@ public class MapJoinOperator extends Abs
           .entrySet()) {
         Byte pos = entry.getKey();
         HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue> hashtable = entry.getValue();
-        String filePath = Utilities.generatePath(baseDir, pos, currentFileName);
+        String filePath = Utilities.generatePath(baseDir, conf.getDumpFilePrefix(), pos,
currentFileName);
         Path path = new Path(filePath);
         LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path.toString());
         hashtable.initilizePersistentHash(path.toUri().getPath());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java Tue Nov  8
05:52:59 2011
@@ -403,7 +403,9 @@ public class MapredLocalTask extends Tas
     if (bigBucketFileName == null || bigBucketFileName.length() == 0) {
       bigBucketFileName = "-";
     }
-    String tmpURIPath = Utilities.generatePath(tmpURI, tag, bigBucketFileName);
+    HashTableSinkOperator htso = (HashTableSinkOperator)childOp;
+    String tmpURIPath = Utilities.generatePath(tmpURI, htso.getConf().getDumpFilePrefix(),
+        tag, bigBucketFileName);
     console.printInfo(Utilities.now() + "\tDump the hashtable into file: " + tmpURIPath);
     Path path = new Path(tmpURIPath);
     FileSystem fs = path.getFileSystem(job);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Nov  8 05:52:59
2011
@@ -118,8 +118,8 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils.ExpressionTypes;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.stats.StatsFactory;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -134,8 +134,8 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
 import org.apache.hadoop.mapred.FileOutputFormat;
@@ -1909,9 +1909,10 @@ public final class Utilities {
 
   public static String suffix = ".hashtable";
 
-  public static String generatePath(String baseURI, Byte tag, String bigBucketFileName) {
-    String path = new String(baseURI + Path.SEPARATOR + "MapJoin-" + tag + "-" + bigBucketFileName
-        + suffix);
+  public static String generatePath(String baseURI, String dumpFilePrefix,
+      Byte tag, String bigBucketFileName) {
+    String path = new String(baseURI + Path.SEPARATOR + "MapJoin-" + dumpFilePrefix + tag
+
+    	"-" + bigBucketFileName + suffix);
     return path;
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Tue Nov
 8 05:52:59 2011
@@ -432,9 +432,18 @@ public class MapJoinProcessor implements
       valueTableDescs.add(valueTableDesc);
       valueFiltedTableDescs.add(valueFilteredTableDesc);
     }
+    String dumpFilePrefix = "";
+    if( joinTree.getMapAliases() != null ) {
+      for(String mapAlias : joinTree.getMapAliases()) {
+        dumpFilePrefix = dumpFilePrefix + mapAlias;
+      }
+      dumpFilePrefix = dumpFilePrefix+"-"+PlanUtils.getCountForMapJoinDumpFilePrefix();
+    } else {
+      dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
+    }
     MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, valueExprMap,
         valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
-        filterMap, op.getConf().getNoOuterJoin());
+        filterMap, op.getConf().getNoOuterJoin(), dumpFilePrefix);
     mapJoinDescriptor.setTagOrder(tagOrder);
 
     MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
Tue Nov  8 05:52:59 2011
@@ -58,7 +58,6 @@ import org.apache.hadoop.hive.ql.plan.Ta
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 /**
  * GenMRSkewJoinProcessor.
@@ -280,10 +279,11 @@ public final class GenMRSkewJoinProcesso
       assert reducer instanceof JoinOperator;
       JoinOperator cloneJoinOp = (JoinOperator) reducer;
 
+      String dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
       MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
           newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
           .getOutputColumnNames(), i, joinDescriptor.getConds(),
-          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin());
+          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
       mapJoinDescriptor.setTagOrder(tags);
       mapJoinDescriptor.setHandleSkewJoin(false);
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java Tue Nov 
8 05:52:59 2011
@@ -25,8 +25,8 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.Map.Entry;
+import java.util.Set;
 
 /**
  * Map Join operator Descriptor implementation.
@@ -81,6 +81,9 @@ public class HashTableSinkDesc extends J
   private LinkedHashMap<String, Integer> bucketFileNameMapping;
   private float hashtableMemoryUsage;
 
+  //map join dump file name
+  private String dumpFilePrefix;
+
   public HashTableSinkDesc() {
     bucketFileNameMapping = new LinkedHashMap<String, Integer>();
   }
@@ -109,6 +112,7 @@ public class HashTableSinkDesc extends J
     this.bigTableAlias = clone.getBigTableAlias();
     this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping();
     this.bucketFileNameMapping = clone.getBucketFileNameMapping();
+    this.dumpFilePrefix = clone.getDumpFilePrefix();
   }
 
 
@@ -134,6 +138,21 @@ public class HashTableSinkDesc extends J
     this.hashtableMemoryUsage = hashtableMemoryUsage;
   }
 
+  /**
+   * @return the dumpFilePrefix
+   */
+  public String getDumpFilePrefix() {
+    return dumpFilePrefix;
+  }
+
+  /**
+   * @param dumpFilePrefix
+   *          the dumpFilePrefix to set
+   */
+  public void setDumpFilePrefix(String dumpFilePrefix) {
+    this.dumpFilePrefix = dumpFilePrefix;
+  }
+
   public boolean isHandleSkewJoin() {
     return handleSkewJoin;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Tue Nov  8 05:52:59
2011
@@ -25,8 +25,8 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.Map.Entry;
+import java.util.Set;
 
 /**
  * Map Join operator Descriptor implementation.
@@ -50,6 +50,9 @@ public class MapJoinDesc extends JoinDes
   private LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>>
aliasBucketFileNameMapping;
   private LinkedHashMap<String, Integer> bucketFileNameMapping;
 
+  //map join dump file name
+  private String dumpFilePrefix;
+
   public MapJoinDesc() {
     bucketFileNameMapping = new LinkedHashMap<String, Integer>();
   }
@@ -64,13 +67,14 @@ public class MapJoinDesc extends JoinDes
     this.bigTableAlias = clone.bigTableAlias;
     this.aliasBucketFileNameMapping = clone.aliasBucketFileNameMapping;
     this.bucketFileNameMapping = clone.bucketFileNameMapping;
+    this.dumpFilePrefix = clone.dumpFilePrefix;
   }
 
   public MapJoinDesc(final Map<Byte, List<ExprNodeDesc>> keys,
       final TableDesc keyTblDesc, final Map<Byte, List<ExprNodeDesc>> values,
       final List<TableDesc> valueTblDescs,final List<TableDesc> valueFilteredTblDescs,
 List<String> outputColumnNames,
       final int posBigTable, final JoinCondDesc[] conds,
-      final Map<Byte, List<ExprNodeDesc>> filters, boolean noOuterJoin) {
+      final Map<Byte, List<ExprNodeDesc>> filters, boolean noOuterJoin, String
dumpFilePrefix) {
     super(values, outputColumnNames, noOuterJoin, conds, filters);
     this.keys = keys;
     this.keyTblDesc = keyTblDesc;
@@ -78,6 +82,7 @@ public class MapJoinDesc extends JoinDes
     this.valueFilteredTblDescs = valueFilteredTblDescs;
     this.posBigTable = posBigTable;
     this.bucketFileNameMapping = new LinkedHashMap<String, Integer>();
+    this.dumpFilePrefix = dumpFilePrefix;
     initRetainExprList();
   }
 
@@ -104,6 +109,21 @@ public class MapJoinDesc extends JoinDes
   }
 
   /**
+   * @return the dumpFilePrefix
+   */
+  public String getDumpFilePrefix() {
+    return dumpFilePrefix;
+  }
+
+  /**
+   * @param dumpFilePrefix
+   *          the dumpFilePrefix to set
+   */
+  public void setDumpFilePrefix(String dumpFilePrefix) {
+    this.dumpFilePrefix = dumpFilePrefix;
+  }
+
+  /**
    * @return the keys
    */
   @Explain(displayName = "keys")

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1199117&r1=1199116&r2=1199117&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Tue Nov  8 05:52:59
2011
@@ -69,6 +69,8 @@ public final class PlanUtils {
 
   protected static final Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.plan.PlanUtils");
 
+  private static long countForMapJoinDumpFilePrefix = 0;
+
   /**
    * ExpressionTypes.
    *
@@ -77,6 +79,10 @@ public final class PlanUtils {
     FIELD, JEXL
   };
 
+  public static long getCountForMapJoinDumpFilePrefix() {
+    return countForMapJoinDumpFilePrefix++;
+  }
+
   @SuppressWarnings("nls")
   public static MapredWork getMapRedWork() {
     try {

Added: hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery2.q?rev=1199117&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery2.q Tue Nov  8 05:52:59
2011
@@ -0,0 +1,39 @@
+drop table x;
+drop table y;
+drop table z;
+
+CREATE TABLE x (name STRING, id INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+
+CREATE TABLE y (id INT, name STRING)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+
+CREATE TABLE z (id INT, name STRING)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+
+load data local inpath '../data/files/x.txt' INTO TABLE x;
+load data local inpath '../data/files/y.txt' INTO TABLE y;
+load data local inpath '../data/files/z.txt' INTO TABLE z;
+
+SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 
+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id);
+
+EXPLAIN
+SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2

+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id);
+
+SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2

+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id);
+
+drop table x;
+drop table y;
+drop table z;

Added: hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out?rev=1199117&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out Tue Nov  8 05:52:59
2011
@@ -0,0 +1,273 @@
+PREHOOK: query: drop table x
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table x
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table y
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table y
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table z
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table z
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE x (name STRING, id INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE x (name STRING, id INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@x
+PREHOOK: query: CREATE TABLE y (id INT, name STRING)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE y (id INT, name STRING)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@y
+PREHOOK: query: CREATE TABLE z (id INT, name STRING)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE z (id INT, name STRING)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@z
+PREHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x
+PREHOOK: type: LOAD
+PREHOOK: Output: default@x
+POSTHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@x
+PREHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y
+PREHOOK: type: LOAD
+PREHOOK: Output: default@y
+POSTHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@y
+PREHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z
+PREHOOK: type: LOAD
+PREHOOK: Output: default@z
+POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@z
+PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 
+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x
+PREHOOK: Input: default@y
+PREHOOK: Input: default@z
+PREHOOK: Output: file:/tmp/tianzhao/hive_2011-10-24_06-01-49_774_6691053797137524902/-mr-10000
+POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 
+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x
+POSTHOOK: Input: default@y
+POSTHOOK: Input: default@z
+POSTHOOK: Output: file:/tmp/tianzhao/hive_2011-10-24_06-01-49_774_6691053797137524902/-mr-10000
+2	Joe	2	Tie	2	Tie
+2	Hank	2	Tie	2	Tie
+PREHOOK: query: EXPLAIN
+SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2

+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2

+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF
(TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL
y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT
TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL
subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
z) id)) (TOK_SELEX
 PR (. (TOK_TABLE_OR_COL z) name)))))
+
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-1 depends on stages: Stage-4
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        subq:x 
+          Fetch Operator
+            limit: -1
+        z 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        subq:x 
+          TableScan
+            alias: x
+            HashTable Sink Operator
+              condition expressions:
+                0 {id} {name}
+                1 {name} {id}
+              handleSkewJoin: false
+              keys:
+                0 [Column[id]]
+                1 [Column[id]]
+              Position of Big Table: 0
+        z 
+          TableScan
+            alias: z
+            HashTable Sink Operator
+              condition expressions:
+                0 {_col0} {_col1} {_col2} {_col3}
+                1 {id} {name}
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[id]]
+              Position of Big Table: 0
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        subq:y 
+          TableScan
+            alias: y
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {id} {name}
+                1 {name} {id}
+              handleSkewJoin: false
+              keys:
+                0 [Column[id]]
+                1 [Column[id]]
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Position of Big Table: 0
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: string
+                      expr: _col4
+                      type: string
+                      expr: _col5
+                      type: int
+                outputColumnNames: _col0, _col1, _col4, _col5
+                Select Operator
+                  expressions:
+                        expr: _col5
+                        type: int
+                        expr: _col4
+                        type: string
+                        expr: _col0
+                        type: int
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col0} {_col1} {_col2} {_col3}
+                      1 {id} {name}
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col0]]
+                      1 [Column[id]]
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Position of Big Table: 0
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: int
+                            expr: _col1
+                            type: string
+                            expr: _col2
+                            type: int
+                            expr: _col3
+                            type: string
+                            expr: _col4
+                            type: int
+                            expr: _col5
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      Select Operator
+                        expressions:
+                              expr: _col0
+                              type: int
+                              expr: _col1
+                              type: string
+                              expr: _col2
+                              type: int
+                              expr: _col3
+                              type: string
+                              expr: _col4
+                              type: int
+                              expr: _col5
+                              type: string
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 0
+                          table:
+                              input format: org.apache.hadoop.mapred.TextInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2,
z.id, z.name
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2

+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x
+PREHOOK: Input: default@y
+PREHOOK: Input: default@z
+PREHOOK: Output: file:/tmp/tianzhao/hive_2011-10-24_06-01-55_615_1052117020867998139/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2,
z.id, z.name
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2

+ FROM y JOIN x ON (x.id = y.id)) subq
+ JOIN z ON (subq.key1 = z.id)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x
+POSTHOOK: Input: default@y
+POSTHOOK: Input: default@z
+POSTHOOK: Output: file:/tmp/tianzhao/hive_2011-10-24_06-01-55_615_1052117020867998139/-mr-10000
+2	Joe	2	Tie	2	Tie
+2	Hank	2	Tie	2	Tie
+PREHOOK: query: drop table x
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@x
+PREHOOK: Output: default@x
+POSTHOOK: query: drop table x
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@x
+POSTHOOK: Output: default@x
+PREHOOK: query: drop table y
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@y
+PREHOOK: Output: default@y
+POSTHOOK: query: drop table y
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@y
+POSTHOOK: Output: default@y
+PREHOOK: query: drop table z
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@z
+PREHOOK: Output: default@z
+POSTHOOK: query: drop table z
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@z
+POSTHOOK: Output: default@z



Mime
View raw message