hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nzh...@apache.org
Subject svn commit: r988744 - in /hadoop/hive/trunk: ./ data/files/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/org/apache/hadoop/hive/serde2/
Date Tue, 24 Aug 2010 22:19:14 GMT
Author: nzhang
Date: Tue Aug 24 22:19:14 2010
New Revision: 988744

URL: http://svn.apache.org/viewvc?rev=988744&view=rev
Log:
 HIVE-741. NULL is not handled correctly in join (Amareshwari Sriramadasu via Ning Zhang)

Added:
    hadoop/hive/trunk/data/files/in1.txt
    hadoop/hive/trunk/data/files/in2.txt
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/join_nulls.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join_nulls.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=988744&r1=988743&r2=988744&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Aug 24 22:19:14 2010
@@ -201,6 +201,9 @@ Trunk -  Unreleased
     HIVE-1589. Add HBase/ZK JARs to Eclipse classpath
     (Carl Steinbach via jvs)
 
+    HIVE-741. NULL is not handled correctly in join
+    (Amareshwari Sriramadasu via Ning Zhang)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Added: hadoop/hive/trunk/data/files/in1.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/in1.txt?rev=988744&view=auto
==============================================================================
--- hadoop/hive/trunk/data/files/in1.txt (added)
+++ hadoop/hive/trunk/data/files/in1.txt Tue Aug 24 22:19:14 2010
@@ -0,0 +1,3 @@
+35
+48
+100100

Added: hadoop/hive/trunk/data/files/in2.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/in2.txt?rev=988744&view=auto
==============================================================================
--- hadoop/hive/trunk/data/files/in2.txt (added)
+++ hadoop/hive/trunk/data/files/in2.txt Tue Aug 24 22:19:14 2010
@@ -0,0 +1,3 @@
+135
+148
+200200

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java?rev=988744&r1=988743&r2=988744&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java
Tue Aug 24 22:19:14 2010
@@ -130,4 +130,18 @@ public abstract class AbstractMapJoinOpe
   public int getType() {
     return OperatorType.MAPJOIN;
   }
+
+  // returns true if there are elements in key list and all of them are null
+  protected boolean hasAllNulls(ArrayList<Object> key) {
+    if (key != null && key.size() > 0) {
+      for (Object k : key) {
+        if (k != null) {
+          return false;
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java?rev=988744&r1=988743&r2=988744&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java Tue Aug
24 22:19:14 2010
@@ -30,6 +30,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.LongWritable;
@@ -86,6 +87,10 @@ public class JoinOperator extends Common
 
       // number of rows for the key in the given table
       int sz = storage.get(alias).size();
+      StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
+      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
+          .toString());
+      Object keyObject = soi.getStructFieldData(row, sf);
 
       // Are we consuming too much memory
       if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag
>= 0)) {
@@ -105,10 +110,6 @@ public class JoinOperator extends Common
           // operand
           // We won't output a warning for the last join operand since the size
           // will never goes to joinEmitInterval.
-          StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
-          StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
-              .toString());
-          Object keyObject = soi.getStructFieldData(row, sf);
           LOG.warn("table " + alias + " has " + sz + " rows for join key "
               + keyObject);
           nextSz = getNextSize(nextSz);
@@ -117,6 +118,11 @@ public class JoinOperator extends Common
 
       // Add the value to the vector
       storage.get(alias).add(nr);
+      // if join-key is null, process each row in different group.
+      if (SerDeUtils.isNullObject(keyObject, sf.getFieldObjectInspector())) {
+        endGroup();
+        startGroup();
+      }
     } catch (Exception e) {
       e.printStackTrace();
       throw new HiveException(e);

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=988744&r1=988743&r2=988744&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue
Aug 24 22:19:14 2010
@@ -282,7 +282,8 @@ public class MapJoinOperator extends Abs
           MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key);
           MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap);
 
-          if (o == null) {
+          // there is no join-value or join-key has all null elements
+          if (o == null || (hasAllNulls(key))) {
             if (noOuterJoin) {
               storage.put(pos, emptyList);
             } else {

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=988744&r1=988743&r2=988744&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Tue
Aug 24 22:19:14 2010
@@ -68,6 +68,7 @@ public class SMBMapJoinOperator extends 
   private transient final HashMap<Byte, Boolean> foundNextKeyGroup = new HashMap<Byte,
Boolean>();
   transient boolean firstFetchHappened = false;
   transient boolean localWorkInited = false;
+  private Map<ArrayList<Object>, Boolean> keyToHasNullsMap;
 
   public SMBMapJoinOperator() {
   }
@@ -109,6 +110,7 @@ public class SMBMapJoinOperator extends 
       }
       foundNextKeyGroup.put(alias, Boolean.FALSE);
     }
+    keyToHasNullsMap = new HashMap<ArrayList<Object>, Boolean>();
   }
 
   @Override
@@ -200,6 +202,7 @@ public class SMBMapJoinOperator extends 
         joinKeysObjectInspectors.get(alias));
     ArrayList<Object> value = computeValues(row, joinValues.get(alias),
         joinValuesObjectInspectors.get(alias));
+    keyToHasNullsMap.put(key, hasAllNulls(key));
 
     //have we reached a new key group?
     boolean nextKeyGroup = processKey(alias, key);
@@ -383,6 +386,13 @@ public class SMBMapJoinOperator extends 
 
   private int compareKeys (ArrayList<Object> k1, ArrayList<Object> k2) {
     int ret = 0;
+    if (keyToHasNullsMap.get(k1) && keyToHasNullsMap.get(k2)) {
+      return -1; // just return k1 is smaller than k2
+    } else if (keyToHasNullsMap.get(k1)) {
+      return (0 - k2.size());
+    } else if (keyToHasNullsMap.get(k2)) {
+      return k1.size();
+    }
     for (int i = 0; i < k1.size() && i < k1.size(); i++) {
       WritableComparable key_1 = (WritableComparable) k1.get(i);
       WritableComparable key_2 = (WritableComparable) k2.get(i);

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join_nulls.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/join_nulls.q?rev=988744&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/join_nulls.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/join_nulls.q Tue Aug 24 22:19:14
2010
@@ -0,0 +1,70 @@
+DROP TABLE myinput1;
+DROP TABLE smb_input1;
+DROP TABLE smb_input2;
+
+CREATE TABLE myinput1(key int, value int);
+LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1;
+
+SELECT * FROM myinput1 a JOIN myinput1 b;
+SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b;
+SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b;
+SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value;
+
+SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN
myinput1 c ON (b.value=c.value);
+SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN
myinput1 c ON (b.value=c.value);
+SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value
= b.value and b.value = c.value;
+
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b;
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value;
+
+CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

+CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2
BUCKETS; 
+LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+
+SET hive.optimize.bucketmapJOIN = true;
+SET hive.optimize.bucketmapJOIN.sortedmerge = true;
+SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key ORDER BY
a.key;
+SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key
ORDER BY b.key, b.value;
+SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key ORDER BY
a.key;
+SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key
ORDER BY a.key, a.value;
+
+SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value ORDER BY
a.key;
+SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value ORDER BY
a.key;
+SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value
ORDER BY a.key, a.value;
+SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value
ORDER BY b.key, b.value;
+
+SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value ORDER
BY a.value;
+SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value
ORDER BY b.key, b.value;
+SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value ORDER
BY a.value;
+SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value
ORDER BY a.key, a.value;
+
+DROP TABLE smb_input2;
+DROP TABLE smb_input1;
+DROP TABLE myinput1;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/join_nulls.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join_nulls.q.out?rev=988744&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/join_nulls.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join_nulls.q.out Tue Aug 24 22:19:14
2010
@@ -0,0 +1,593 @@
+PREHOOK: query: DROP TABLE myinput1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE myinput1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE smb_input1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE smb_input1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE smb_input2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE smb_input2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE myinput1(key int, value int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE myinput1(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@myinput1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@myinput1
+PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-17_243_2644277693400732372/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-17_243_2644277693400732372/-mr-10000
+NULL	35	NULL	35
+NULL	35	48	NULL
+NULL	35	100	100
+48	NULL	NULL	35
+48	NULL	48	NULL
+48	NULL	100	100
+100	100	NULL	35
+100	100	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-19_784_5319048142201987096/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-19_784_5319048142201987096/-mr-10000
+NULL	35	NULL	35
+NULL	35	48	NULL
+NULL	35	100	100
+48	NULL	NULL	35
+48	NULL	48	NULL
+48	NULL	100	100
+100	100	NULL	35
+100	100	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-22_077_1127144305944050770/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-22_077_1127144305944050770/-mr-10000
+NULL	35	NULL	35
+NULL	35	48	NULL
+NULL	35	100	100
+48	NULL	NULL	35
+48	NULL	48	NULL
+48	NULL	100	100
+100	100	NULL	35
+100	100	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-24_424_3529043878133127087/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-24_424_3529043878133127087/-mr-10000
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-26_755_1023724496806721215/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-26_755_1023724496806721215/-mr-10000
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-29_041_5563395763782073560/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-29_041_5563395763782073560/-mr-10000
+NULL	35	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-31_315_7618449693784281411/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-31_315_7618449693784281411/-mr-10000
+NULL	35	NULL	NULL
+48	NULL	NULL	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-33_682_9012862787524525556/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-33_682_9012862787524525556/-mr-10000
+48	NULL	NULL	NULL
+NULL	35	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-35_913_1368787467713976651/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-35_913_1368787467713976651/-mr-10000
+NULL	35	NULL	NULL
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-38_139_507910550329126296/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-38_139_507910550329126296/-mr-10000
+NULL	NULL	48	NULL
+NULL	NULL	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-40_380_5752206579111501535/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-40_380_5752206579111501535/-mr-10000
+NULL	NULL	NULL	35
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-42_711_7203915333997653053/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-42_711_7203915333997653053/-mr-10000
+NULL	NULL	48	NULL
+NULL	35	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-45_005_2097190139910462757/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-45_005_2097190139910462757/-mr-10000
+NULL	35	NULL	NULL
+NULL	NULL	48	NULL
+NULL	NULL	NULL	35
+48	NULL	NULL	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-47_257_7323115616296556466/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-47_257_7323115616296556466/-mr-10000
+NULL	35	NULL	NULL
+NULL	NULL	NULL	35
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-49_511_2835828582770697326/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-49_511_2835828582770697326/-mr-10000
+48	NULL	NULL	NULL
+NULL	NULL	48	NULL
+NULL	35	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value)
RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-51_722_5486518368735387317/-mr-10000
+POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value)
RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-51_722_5486518368735387317/-mr-10000
+NULL	NULL	NULL	NULL	48	NULL
+NULL	35	NULL	35	NULL	35
+100	100	100	100	100	100
+PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value)
LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-54_007_6526115910610177518/-mr-10000
+POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value)
LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-54_007_6526115910610177518/-mr-10000
+NULL	NULL	48	NULL	NULL	NULL
+NULL	35	NULL	35	NULL	35
+100	100	100	100	100	100
+PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1
c ON a.value = b.value and b.value = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-56_301_1180499696568055707/-mr-10000
+POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1
c ON a.value = b.value and b.value = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-02-56_301_1180499696568055707/-mr-10000
+NULL	NULL	NULL	NULL	48	NULL
+NULL	35	NULL	35	NULL	35
+100	100	100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-00_784_8177108597645959542/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-00_784_8177108597645959542/-mr-10000
+NULL	35	NULL	35
+48	NULL	NULL	35
+100	100	NULL	35
+NULL	35	48	NULL
+48	NULL	48	NULL
+100	100	48	NULL
+NULL	35	100	100
+48	NULL	100	100
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-02_995_5954257082995484423/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-02_995_5954257082995484423/-mr-10000
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-05_179_893551064505374226/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-05_179_893551064505374226/-mr-10000
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-07_429_2356564524660274058/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value =
b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-07_429_2356564524660274058/-mr-10000
+NULL	35	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-09_626_7027313822877080506/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-09_626_7027313822877080506/-mr-10000
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-11_809_1536686550822126228/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-11_809_1536686550822126228/-mr-10000
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-14_070_2576009303176359392/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value =
b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-14_070_2576009303176359392/-mr-10000
+NULL	35	NULL	35
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON
a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-16_258_3601408426081449535/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON
a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-16_258_3601408426081449535/-mr-10000
+NULL	35	NULL	NULL
+48	NULL	NULL	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON
a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-18_463_6253242788418262785/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON
a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-18_463_6253242788418262785/-mr-10000
+NULL	35	NULL	NULL
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON
a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-20_654_6218349195280704470/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON
a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-20_654_6218349195280704470/-mr-10000
+NULL	35	NULL	35
+48	NULL	NULL	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON
a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-22_924_6570219390510773741/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON
a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-22_924_6570219390510773741/-mr-10000
+NULL	NULL	NULL	35
+NULL	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON
a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-25_161_1733690523870724803/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON
a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-25_161_1733690523870724803/-mr-10000
+NULL	NULL	NULL	35
+48	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON
a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-27_372_7774596545760682849/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON
a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-27_372_7774596545760682849/-mr-10000
+NULL	35	NULL	35
+NULL	NULL	48	NULL
+100	100	100	100
+PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY
(key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY
(key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@smb_input1
+PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY
(value) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED
BY (value) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@smb_input2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@smb_input1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@smb_input1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@smb_input2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@smb_input2
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key =
b.key ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-30_243_4546985922226107164/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key
= b.key ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-30_243_4546985922226107164/-mr-10000
+48	NULL	48	NULL
+100	100	100	100
+148	NULL	148	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1
b ON a.key = b.key ORDER BY b.key, b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-34_575_8944309786058146271/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1
b ON a.key = b.key ORDER BY b.key, b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-34_575_8944309786058146271/-mr-10000
+NULL	NULL	NULL	35
+NULL	NULL	NULL	135
+48	NULL	48	NULL
+100	100	100	100
+148	NULL	148	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key =
b.key ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-38_873_5820673626777323488/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key
= b.key ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-38_873_5820673626777323488/-mr-10000
+48	NULL	48	NULL
+100	100	100	100
+148	NULL	148	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b
ON a.key = b.key ORDER BY a.key, a.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-43_139_2148561557873278612/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1
b ON a.key = b.key ORDER BY a.key, a.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-43_139_2148561557873278612/-mr-10000
+NULL	35	NULL	NULL
+NULL	135	NULL	NULL
+48	NULL	48	NULL
+100	100	100	100
+148	NULL	148	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key =
b.value ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-47_455_7725011320735873205/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key
= b.value ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-47_455_7725011320735873205/-mr-10000
+100	100	100	100
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key =
b.value ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-51_797_1660829853063165698/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key
= b.value ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-51_797_1660829853063165698/-mr-10000
+100	100	100	100
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b
ON a.key = b.value ORDER BY a.key, a.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-56_206_1650617190450676746/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2
b ON a.key = b.value ORDER BY a.key, a.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-03-56_206_1650617190450676746/-mr-10000
+NULL	35	NULL	NULL
+NULL	135	NULL	NULL
+48	NULL	NULL	NULL
+100	100	100	100
+148	NULL	NULL	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2
b ON a.key = b.value ORDER BY b.key, b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-00_563_760559040118626904/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2
b ON a.key = b.value ORDER BY b.key, b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-00_563_760559040118626904/-mr-10000
+NULL	NULL	NULL	35
+NULL	NULL	NULL	135
+NULL	NULL	48	NULL
+100	100	100	100
+NULL	NULL	148	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value
= b.value ORDER BY a.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-04_862_4725133242101166020/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value
= b.value ORDER BY a.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-04_862_4725133242101166020/-mr-10000
+NULL	35	NULL	35
+100	100	100	100
+NULL	135	NULL	135
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2
b ON a.value = b.value ORDER BY b.key, b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-09_224_8780642004670528896/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2
b ON a.value = b.value ORDER BY b.key, b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-09_224_8780642004670528896/-mr-10000
+NULL	35	NULL	35
+NULL	135	NULL	135
+NULL	NULL	48	NULL
+100	100	100	100
+NULL	NULL	148	NULL
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value
= b.value ORDER BY a.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-13_547_7917315032596933463/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value
= b.value ORDER BY a.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-13_547_7917315032596933463/-mr-10000
+NULL	35	NULL	35
+100	100	100	100
+NULL	135	NULL	135
+200	200	200	200
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b
ON a.value = b.value ORDER BY a.key, a.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_input2
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-17_843_48387701327028385/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2
b ON a.value = b.value ORDER BY a.key, a.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-08-23_23-04-17_843_48387701327028385/-mr-10000
+NULL	35	NULL	35
+NULL	135	NULL	135
+48	NULL	NULL	NULL
+100	100	100	100
+148	NULL	NULL	NULL
+200	200	200	200
+PREHOOK: query: DROP TABLE smb_input2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@smb_input2
+PREHOOK: Output: default@smb_input2
+POSTHOOK: query: DROP TABLE smb_input2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@smb_input2
+POSTHOOK: Output: default@smb_input2
+PREHOOK: query: DROP TABLE smb_input1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@smb_input1
+PREHOOK: Output: default@smb_input1
+POSTHOOK: query: DROP TABLE smb_input1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@smb_input1
+POSTHOOK: Output: default@smb_input1
+PREHOOK: query: DROP TABLE myinput1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@myinput1
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: DROP TABLE myinput1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@myinput1
+POSTHOOK: Output: default@myinput1

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java?rev=988744&r1=988743&r2=988744&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java Tue Aug
24 22:19:14 2010
@@ -331,6 +331,92 @@ public final class SerDeUtils {
     }
   }
 
+  /**
+   * True if Object passed is representing null object.
+   *
+   * @param o The object
+   * @param oi The ObjectInspector
+   *
+   * @return true if the object passed is representing NULL object
+   *         false otherwise
+   */
+  public static boolean isNullObject(Object o, ObjectInspector oi) {
+    switch (oi.getCategory()) {
+    case PRIMITIVE: {
+      if (o == null) {
+        return true;
+      }
+      return false;
+    }
+    case LIST: {
+      ListObjectInspector loi = (ListObjectInspector) oi;
+      ObjectInspector listElementObjectInspector = loi
+          .getListElementObjectInspector();
+      List<?> olist = loi.getList(o);
+      if (olist == null) {
+        return true;
+      } else {
+        // there are no elements in the list
+        if (olist.size() == 0) {
+          return false;
+        }
+        // if all the elements are representing null, then return true
+        for (int i = 0; i < olist.size(); i++) {
+          if (!isNullObject(olist.get(i), listElementObjectInspector)) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+    case MAP: {
+      MapObjectInspector moi = (MapObjectInspector) oi;
+      ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
+      ObjectInspector mapValueObjectInspector = moi
+          .getMapValueObjectInspector();
+      Map<?, ?> omap = moi.getMap(o);
+      if (omap == null) {
+        return true;
+      } else {
+        // there are no elements in the map
+        if (omap.entrySet().size() == 0) {
+          return false;
+        }
+        // if all the entries of map are representing null, then return true
+        for (Map.Entry<?, ?> entry : omap.entrySet()) {
+          if (!isNullObject(entry.getKey(), mapKeyObjectInspector)
+              || !isNullObject(entry.getValue(), mapValueObjectInspector)) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+    case STRUCT: {
+      StructObjectInspector soi = (StructObjectInspector) oi;
+      List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+      if (o == null) {
+        return true;
+      } else {
+        // there are no fields in the struct
+        if (structFields.size() == 0) {
+          return false;
+        }
+        // if all the fields of struct are representing null, then return true
+        for (int i = 0; i < structFields.size(); i++) {
+          if (!isNullObject(soi.getStructFieldData(o, structFields.get(i)),
+              structFields.get(i).getFieldObjectInspector())) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+    default:
+      throw new RuntimeException("Unknown type in ObjectInspector!");
+    }
+  }
+
   private SerDeUtils() {
     // prevent instantiation
   }



Mime
View raw message