Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6179FF571 for ; Tue, 26 Mar 2013 06:45:09 +0000 (UTC) Received: (qmail 56893 invoked by uid 500); 26 Mar 2013 06:45:08 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 56625 invoked by uid 500); 26 Mar 2013 06:45:04 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 56569 invoked by uid 99); 26 Mar 2013 06:45:02 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 26 Mar 2013 06:45:02 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 26 Mar 2013 06:44:57 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 35BCA2388906; Tue, 26 Mar 2013 06:44:36 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1460988 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/ java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ test/results/clientpositive/ Date: Tue, 26 Mar 2013 06:44:35 -0000 To: commits@hive.apache.org From: namit@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130326064436.35BCA2388906@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: namit Date: Tue Mar 26 06:44:35 2013 New Revision: 1460988 URL: http://svn.apache.org/r1460988 Log: HIVE-4212 sort merge join should work for outer joins for more than 8 inputs (Namit via Gang Tim Liu) Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Tue Mar 26 06:44:35 2013 @@ -236,8 +236,8 @@ public enum ErrorMsg { "Fix the metadata or don't use bucketed mapjoin, by setting " + "hive.enforce.bucketmapjoin to false."), - JOINNODE_OUTERJOIN_MORETHAN_8(10142, "Single join node containing outer join(s) " + - "cannot have more than 8 aliases"), + JOINNODE_OUTERJOIN_MORETHAN_32(10142, "Single join node containing outer join(s) " + + "cannot have more than 32 aliases"), INVALID_JDO_FILTER_EXPRESSION(10043, "Invalid expression for JDO filter"), Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Tue Mar 26 06:44:35 2013 @@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -273,18 +273,15 @@ public abstract class CommonJoinOperator for (Byte alias : order) { ArrayList rcOIs = new ArrayList(); rcOIs.addAll(joinValuesObjectInspectors[alias]); - // for each alias, add object inspector for boolean as the last element + // for each alias, add object inspector for short as the last element rcOIs.add( - PrimitiveObjectInspectorFactory.writableByteObjectInspector); + PrimitiveObjectInspectorFactory.writableShortObjectInspector); rowContainerObjectInspectors[alias] = rcOIs; } rowContainerStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE, tagLen); } - - - dummyObj = new Object[numAliases]; dummyObjVectors = new RowContainer[numAliases]; @@ -309,7 +306,7 @@ public abstract class CommonJoinOperator // add whether the row is filtered or not // this value does not matter for the dummyObj // because the join values are already null - nr.add(new ByteWritable()); + nr.add(new ShortWritable()); } dummyObj[pos] = nr; // there should be only 1 dummy object in the RowContainer @@ -324,7 +321,7 @@ public abstract class CommonJoinOperator // e.g., the output columns does not contains the input table RowContainer rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors[pos], - alias, joinCacheSize,spillTableDesc, conf, !hasFilter(pos), reporter); + alias, joinCacheSize, spillTableDesc, conf, !hasFilter(pos), reporter); storage[pos] = rc; pos++; @@ -861,7 +858,7 @@ transient boolean newGroupStarted = fals // returns filter result of left object by filters associated with right alias private boolean isLeftFiltered(int left, int right, List leftObj) { if (joinValues[order[left]].size() < leftObj.size()) { - ByteWritable filter = (ByteWritable) leftObj.get(leftObj.size() - 1); + ShortWritable filter = (ShortWritable) leftObj.get(leftObj.size() - 1); return JoinUtil.isFiltered(filter.get(), right); } return false; @@ -870,7 +867,7 @@ transient boolean newGroupStarted = fals // returns filter result of right object by filters associated with left alias private boolean isRightFiltered(int left, int right, List rightObj) { if (joinValues[order[right]].size() < rightObj.size()) { - ByteWritable filter = (ByteWritable) rightObj.get(rightObj.size() - 1); + ShortWritable filter = (ShortWritable) rightObj.get(rightObj.size() - 1); return JoinUtil.isFiltered(filter.get(), left); } return false; @@ -879,7 +876,8 @@ transient boolean newGroupStarted = fals // returns object has any filtered tag private boolean hasAnyFiltered(int alias, List row) { return row == dummyObj[alias] || - hasFilter(alias) && JoinUtil.hasAnyFiltered(((ByteWritable) row.get(row.size() - 1)).get()); + hasFilter(alias) && + JoinUtil.hasAnyFiltered(((ShortWritable) row.get(row.size() - 1)).get()); } protected final boolean hasFilter(int alias) { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Mar 26 06:44:35 2013 @@ -231,7 +231,7 @@ public class HashTableSinkOperator exten if (filterMap != null && filterMap[alias] != null) { // for each alias, add object inspector for filter tag as the last element rcOIs = new ArrayList(rcOIs); - rcOIs.add(PrimitiveObjectInspectorFactory.writableByteObjectInspector); + rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector); } rowContainerObjectInspectors[alias] = rcOIs; } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java Tue Mar 26 06:44:35 2013 @@ -35,15 +35,15 @@ import org.apache.hadoop.hive.ql.plan.Jo import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; @@ -203,7 +203,7 @@ public class JoinUtil { if (filterMap != null) { nr = new Object[valueFields.size()+1]; // add whether the row is filtered or not. - nr[valueFields.size()] = new ByteWritable(isFiltered(row, filters, filtersOI, filterMap)); + nr[valueFields.size()] = new ShortWritable(isFiltered(row, filters, filtersOI, filterMap)); }else{ nr = new Object[valueFields.size()]; } @@ -235,22 +235,29 @@ public class JoinUtil { } if (filterMap != null) { // add whether the row is filtered or not. - nr.add(new ByteWritable(isFiltered(row, filters, filtersOI, filterMap))); + nr.add(new ShortWritable(isFiltered(row, filters, filtersOI, filterMap))); } return nr; } - private static final byte[] MASKS = new byte[] - {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte) 0x80}; + private static final short[] MASKS; + static { + int num = 32; + MASKS = new short[num]; + MASKS[0] = 1; + for (int idx = 1; idx < num; idx++) { + MASKS[idx] = (short)(2 * MASKS[idx-1]); + } + } /** * Returns true if the row does not pass through filters. */ - protected static byte isFiltered(Object row, List filters, + protected static short isFiltered(Object row, List filters, List ois, int[] filterMap) throws HiveException { // apply join filters on the row. - byte ret = 0; + short ret = 0; int j = 0; for (int i = 0; i < filterMap.length; i += 2) { int tag = filterMap[i]; @@ -274,11 +281,11 @@ public class JoinUtil { return ret; } - protected static boolean isFiltered(byte filter, int tag) { + protected static boolean isFiltered(short filter, int tag) { return (filter & MASKS[tag]) != 0; } - protected static boolean hasAnyFiltered(byte tag) { + protected static boolean hasAnyFiltered(short tag) { return tag != 0; } @@ -330,7 +337,7 @@ public class JoinUtil { if (!noFilter) { colNames.append("filtered"); colNames.append(','); - colTypes.append(TypeInfoFactory.byteTypeInfo.getTypeName()); + colTypes.append(TypeInfoFactory.shortTypeInfo.getTypeName()); colTypes.append(','); } // remove the last ',' Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Tue Mar 26 06:44:35 2013 @@ -439,7 +439,7 @@ public class MapJoinProcessor implements } if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) { ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory - .getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME), "filter", "filter", false); + .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter", "filter", false); valueFilteredCols.add(isFilterDesc); } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Mar 26 06:44:35 2013 @@ -6580,8 +6580,8 @@ public class SemanticAnalyzer extends Ba } if (!node.getNoOuterJoin() || !target.getNoOuterJoin()) { // todo 8 way could be not enough number - if (node.getLeftAliases().length + node.getRightAliases().length + 1 >= 8) { - LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_8); + if (node.getLeftAliases().length + node.getRightAliases().length + 1 >= 32) { + LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_32); return false; } } Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out?rev=1460988&r1=1460987&r2=1460988&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out Tue Mar 26 06:44:35 2013 @@ -397,7 +397,6 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -414,13 +413,15 @@ STAGE PLANS: Left Outer Join0 to 3 Left Outer Join0 to 4 Left Outer Join0 to 5 + Left Outer Join0 to 6 condition expressions: - 0 {key} + 0 1 2 3 4 5 + 6 handleSkewJoin: false keys: 0 [Column[key]] @@ -429,61 +430,21 @@ STAGE PLANS: 3 [Column[key]] 4 [Column[key]] 5 [Column[key]] - outputColumnNames: _col0 + 6 [Column[key]] Position of Big Table: 0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - g - TableScan - alias: g - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 - handleSkewJoin: false - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -598,9 +559,6 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-12 depends on stages: Stage-1 - Stage-11 depends on stages: Stage-12 - Stage-3 depends on stages: Stage-11 Stage-0 is a root stage STAGE PLANS: @@ -617,13 +575,17 @@ STAGE PLANS: Left Outer Join0 to 3 Left Outer Join0 to 4 Left Outer Join0 to 5 + Left Outer Join0 to 6 + Left Outer Join0 to 7 condition expressions: - 0 {key} + 0 1 2 3 4 5 + 6 + 7 handleSkewJoin: false keys: 0 [Column[key]] @@ -632,82 +594,8 @@ STAGE PLANS: 3 [Column[key]] 4 [Column[key]] 5 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - g - TableScan - alias: g - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col20} - 1 - handleSkewJoin: false - outputColumnNames: _col20 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - h - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - h - TableScan - alias: h - HashTable Sink Operator - condition expressions: - 0 - 1 - handleSkewJoin: false - keys: - 0 [Column[_col20]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-11 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 - handleSkewJoin: false - keys: - 0 [Column[_col20]] - 1 [Column[key]] + 6 [Column[key]] + 7 [Column[key]] Position of Big Table: 0 Select Operator Group By Operator @@ -716,25 +604,12 @@ STAGE PLANS: bucketGroup: false mode: hash outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: