hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1460988 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/ java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ test/results/clientpositive/
Date Tue, 26 Mar 2013 06:44:35 GMT
Author: namit
Date: Tue Mar 26 06:44:35 2013
New Revision: 1460988

URL: http://svn.apache.org/r1460988
Log:
HIVE-4212 sort merge join should work for outer joins for more than 8 inputs
(Namit via Gang Tim Liu)


Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Tue Mar 26 06:44:35 2013
@@ -236,8 +236,8 @@ public enum ErrorMsg {
     "Fix the metadata or don't use bucketed mapjoin, by setting " +
     "hive.enforce.bucketmapjoin to false."),
 
-  JOINNODE_OUTERJOIN_MORETHAN_8(10142, "Single join node containing outer join(s) " +
-      "cannot have more than 8 aliases"),
+  JOINNODE_OUTERJOIN_MORETHAN_32(10142, "Single join node containing outer join(s) " +
+      "cannot have more than 32 aliases"),
 
   INVALID_JDO_FILTER_EXPRESSION(10043, "Invalid expression for JDO filter"),
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Tue Mar
26 06:44:35 2013
@@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -273,18 +273,15 @@ public abstract class CommonJoinOperator
       for (Byte alias : order) {
         ArrayList<ObjectInspector> rcOIs = new ArrayList<ObjectInspector>();
         rcOIs.addAll(joinValuesObjectInspectors[alias]);
-        // for each alias, add object inspector for boolean as the last element
+        // for each alias, add object inspector for short as the last element
         rcOIs.add(
-            PrimitiveObjectInspectorFactory.writableByteObjectInspector);
+            PrimitiveObjectInspectorFactory.writableShortObjectInspector);
         rowContainerObjectInspectors[alias] = rcOIs;
       }
       rowContainerStandardObjectInspectors =
         JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE,
tagLen);
     }
 
-
-
-
     dummyObj = new Object[numAliases];
     dummyObjVectors = new RowContainer[numAliases];
 
@@ -309,7 +306,7 @@ public abstract class CommonJoinOperator
         // add whether the row is filtered or not
         // this value does not matter for the dummyObj
         // because the join values are already null
-        nr.add(new ByteWritable());
+        nr.add(new ShortWritable());
       }
       dummyObj[pos] = nr;
       // there should be only 1 dummy object in the RowContainer
@@ -324,7 +321,7 @@ public abstract class CommonJoinOperator
       // e.g., the output columns does not contains the input table
       RowContainer rc = JoinUtil.getRowContainer(hconf,
           rowContainerStandardObjectInspectors[pos],
-          alias, joinCacheSize,spillTableDesc, conf, !hasFilter(pos), reporter);
+          alias, joinCacheSize, spillTableDesc, conf, !hasFilter(pos), reporter);
       storage[pos] = rc;
 
       pos++;
@@ -861,7 +858,7 @@ transient boolean newGroupStarted = fals
   // returns filter result of left object by filters associated with right alias
   private boolean isLeftFiltered(int left, int right, List<Object> leftObj) {
     if (joinValues[order[left]].size() < leftObj.size()) {
-      ByteWritable filter = (ByteWritable) leftObj.get(leftObj.size() - 1);
+      ShortWritable filter = (ShortWritable) leftObj.get(leftObj.size() - 1);
       return JoinUtil.isFiltered(filter.get(), right);
     }
     return false;
@@ -870,7 +867,7 @@ transient boolean newGroupStarted = fals
   // returns filter result of right object by filters associated with left alias
   private boolean isRightFiltered(int left, int right, List<Object> rightObj) {
     if (joinValues[order[right]].size() < rightObj.size()) {
-      ByteWritable filter = (ByteWritable) rightObj.get(rightObj.size() - 1);
+      ShortWritable filter = (ShortWritable) rightObj.get(rightObj.size() - 1);
       return JoinUtil.isFiltered(filter.get(), left);
     }
     return false;
@@ -879,7 +876,8 @@ transient boolean newGroupStarted = fals
   // returns object has any filtered tag
   private boolean hasAnyFiltered(int alias, List<Object> row) {
     return row == dummyObj[alias] ||
-        hasFilter(alias) && JoinUtil.hasAnyFiltered(((ByteWritable) row.get(row.size()
- 1)).get());
+        hasFilter(alias) &&
+        JoinUtil.hasAnyFiltered(((ShortWritable) row.get(row.size() - 1)).get());
   }
 
   protected final boolean hasFilter(int alias) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Mar
26 06:44:35 2013
@@ -231,7 +231,7 @@ public class HashTableSinkOperator exten
         if (filterMap != null && filterMap[alias] != null) {
           // for each alias, add object inspector for filter tag as the last element
           rcOIs = new ArrayList<ObjectInspector>(rcOIs);
-          rcOIs.add(PrimitiveObjectInspectorFactory.writableByteObjectInspector);
+          rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
         }
         rowContainerObjectInspectors[alias] = rcOIs;
       }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java Tue Mar 26 06:44:35
2013
@@ -35,15 +35,15 @@ import org.apache.hadoop.hive.ql.plan.Jo
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -203,7 +203,7 @@ public class JoinUtil {
     if (filterMap != null) {
       nr = new Object[valueFields.size()+1];
       // add whether the row is filtered or not.
-      nr[valueFields.size()] = new ByteWritable(isFiltered(row, filters, filtersOI, filterMap));
+      nr[valueFields.size()] = new ShortWritable(isFiltered(row, filters, filtersOI, filterMap));
     }else{
       nr = new Object[valueFields.size()];
     }
@@ -235,22 +235,29 @@ public class JoinUtil {
     }
     if (filterMap != null) {
       // add whether the row is filtered or not.
-      nr.add(new ByteWritable(isFiltered(row, filters, filtersOI, filterMap)));
+      nr.add(new ShortWritable(isFiltered(row, filters, filtersOI, filterMap)));
     }
 
     return nr;
   }
 
-  private static final byte[] MASKS = new byte[]
-      {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte) 0x80};
+  private static final short[] MASKS;
+  static {
+    int num = 32;
+    MASKS = new short[num];
+    MASKS[0] = 1;
+    for (int idx = 1; idx < num; idx++) {
+      MASKS[idx] = (short)(2 * MASKS[idx-1]);
+    }
+  }
 
   /**
    * Returns true if the row does not pass through filters.
    */
-  protected static byte isFiltered(Object row, List<ExprNodeEvaluator> filters,
+  protected static short isFiltered(Object row, List<ExprNodeEvaluator> filters,
       List<ObjectInspector> ois, int[] filterMap) throws HiveException {
     // apply join filters on the row.
-    byte ret = 0;
+    short ret = 0;
     int j = 0;
     for (int i = 0; i < filterMap.length; i += 2) {
       int tag = filterMap[i];
@@ -274,11 +281,11 @@ public class JoinUtil {
     return ret;
   }
 
-  protected static boolean isFiltered(byte filter, int tag) {
+  protected static boolean isFiltered(short filter, int tag) {
     return (filter & MASKS[tag]) != 0;
   }
 
-  protected static boolean hasAnyFiltered(byte tag) {
+  protected static boolean hasAnyFiltered(short tag) {
     return tag != 0;
   }
 
@@ -330,7 +337,7 @@ public class JoinUtil {
       if (!noFilter) {
         colNames.append("filtered");
         colNames.append(',');
-        colTypes.append(TypeInfoFactory.byteTypeInfo.getTypeName());
+        colTypes.append(TypeInfoFactory.shortTypeInfo.getTypeName());
         colTypes.append(',');
       }
       // remove the last ','

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Tue Mar
26 06:44:35 2013
@@ -439,7 +439,7 @@ public class MapJoinProcessor implements
       }
       if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos)
{
         ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory
-            .getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME), "filter", "filter",
false);
+            .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter", "filter",
false);
         valueFilteredCols.add(isFilterDesc);
       }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Mar 26
06:44:35 2013
@@ -6580,8 +6580,8 @@ public class SemanticAnalyzer extends Ba
     }
     if (!node.getNoOuterJoin() || !target.getNoOuterJoin()) {
       // todo 8 way could be not enough number
-      if (node.getLeftAliases().length + node.getRightAliases().length + 1 >= 8) {
-        LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_8);
+      if (node.getLeftAliases().length + node.getRightAliases().length + 1 >= 32) {
+        LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_32);
         return false;
       }
     }

Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out?rev=1460988&r1=1460987&r2=1460988&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out Tue Mar 26 06:44:35
2013
@@ -397,7 +397,6 @@ ABSTRACT SYNTAX TREE:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1
   Stage-0 is a root stage
 
 STAGE PLANS:
@@ -414,13 +413,15 @@ STAGE PLANS:
                    Left Outer Join0 to 3
                    Left Outer Join0 to 4
                    Left Outer Join0 to 5
+                   Left Outer Join0 to 6
               condition expressions:
-                0 {key}
+                0 
                 1 
                 2 
                 3 
                 4 
                 5 
+                6 
               handleSkewJoin: false
               keys:
                 0 [Column[key]]
@@ -429,61 +430,21 @@ STAGE PLANS:
                 3 [Column[key]]
                 4 [Column[key]]
                 5 [Column[key]]
-              outputColumnNames: _col0
+                6 [Column[key]]
               Position of Big Table: 0
-              Reduce Output Operator
-                key expressions:
-                      expr: _col0
-                      type: int
-                sort order: +
-                Map-reduce partition columns:
-                      expr: _col0
-                      type: int
-                tag: 0
-        g 
-          TableScan
-            alias: g
-            Reduce Output Operator
-              key expressions:
-                    expr: key
-                    type: int
-              sort order: +
-              Map-reduce partition columns:
-                    expr: key
-                    type: int
-              tag: 1
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Outer Join0 to 1
-          condition expressions:
-            0 
-            1 
-          handleSkewJoin: false
-          Select Operator
-            Group By Operator
-              aggregations:
-                    expr: count()
-              bucketGroup: false
-              mode: hash
-              outputColumnNames: _col0
-              File Output Operator
-                compressed: false
-                GlobalTableId: 0
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
-  Stage: Stage-2
-    Map Reduce
-      Alias -> Map Operator Tree:
-#### A masked pattern was here ####
-            Reduce Output Operator
-              sort order: 
-              tag: -1
-              value expressions:
-                    expr: _col0
-                    type: bigint
+              Select Operator
+                Group By Operator
+                  aggregations:
+                        expr: count()
+                  bucketGroup: false
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    sort order: 
+                    tag: -1
+                    value expressions:
+                          expr: _col0
+                          type: bigint
       Reduce Operator Tree:
         Group By Operator
           aggregations:
@@ -598,9 +559,6 @@ ABSTRACT SYNTAX TREE:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-12 depends on stages: Stage-1
-  Stage-11 depends on stages: Stage-12
-  Stage-3 depends on stages: Stage-11
   Stage-0 is a root stage
 
 STAGE PLANS:
@@ -617,13 +575,17 @@ STAGE PLANS:
                    Left Outer Join0 to 3
                    Left Outer Join0 to 4
                    Left Outer Join0 to 5
+                   Left Outer Join0 to 6
+                   Left Outer Join0 to 7
               condition expressions:
-                0 {key}
+                0 
                 1 
                 2 
                 3 
                 4 
                 5 
+                6 
+                7 
               handleSkewJoin: false
               keys:
                 0 [Column[key]]
@@ -632,82 +594,8 @@ STAGE PLANS:
                 3 [Column[key]]
                 4 [Column[key]]
                 5 [Column[key]]
-              outputColumnNames: _col0
-              Position of Big Table: 0
-              Reduce Output Operator
-                key expressions:
-                      expr: _col0
-                      type: int
-                sort order: +
-                Map-reduce partition columns:
-                      expr: _col0
-                      type: int
-                tag: 0
-                value expressions:
-                      expr: _col0
-                      type: int
-        g 
-          TableScan
-            alias: g
-            Reduce Output Operator
-              key expressions:
-                    expr: key
-                    type: int
-              sort order: +
-              Map-reduce partition columns:
-                    expr: key
-                    type: int
-              tag: 1
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Outer Join0 to 1
-          condition expressions:
-            0 {VALUE._col20}
-            1 
-          handleSkewJoin: false
-          outputColumnNames: _col20
-          File Output Operator
-            compressed: false
-            GlobalTableId: 0
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
-  Stage: Stage-12
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        h 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        h 
-          TableScan
-            alias: h
-            HashTable Sink Operator
-              condition expressions:
-                0 
-                1 
-              handleSkewJoin: false
-              keys:
-                0 [Column[_col20]]
-                1 [Column[key]]
-              Position of Big Table: 0
-
-  Stage: Stage-11
-    Map Reduce
-      Alias -> Map Operator Tree:
-        $INTNAME 
-            Map Join Operator
-              condition map:
-                   Left Outer Join0 to 1
-              condition expressions:
-                0 
-                1 
-              handleSkewJoin: false
-              keys:
-                0 [Column[_col20]]
-                1 [Column[key]]
+                6 [Column[key]]
+                7 [Column[key]]
               Position of Big Table: 0
               Select Operator
                 Group By Operator
@@ -716,25 +604,12 @@ STAGE PLANS:
                   bucketGroup: false
                   mode: hash
                   outputColumnNames: _col0
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-      Local Work:
-        Map Reduce Local Work
-
-  Stage: Stage-3
-    Map Reduce
-      Alias -> Map Operator Tree:
-#### A masked pattern was here ####
-            Reduce Output Operator
-              sort order: 
-              tag: -1
-              value expressions:
-                    expr: _col0
-                    type: bigint
+                  Reduce Output Operator
+                    sort order: 
+                    tag: -1
+                    value expressions:
+                          expr: _col0
+                          type: bigint
       Reduce Operator Tree:
         Group By Operator
           aggregations:



Mime
View raw message