Return-Path: Delivered-To: apmail-hadoop-hive-commits-archive@minotaur.apache.org Received: (qmail 28436 invoked from network); 16 Mar 2010 23:30:43 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 16 Mar 2010 23:30:43 -0000 Received: (qmail 30110 invoked by uid 500); 16 Mar 2010 23:30:43 -0000 Delivered-To: apmail-hadoop-hive-commits-archive@hadoop.apache.org Received: (qmail 30052 invoked by uid 500); 16 Mar 2010 23:30:43 -0000 Mailing-List: contact hive-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hadoop.apache.org Delivered-To: mailing list hive-commits@hadoop.apache.org Received: (qmail 30044 invoked by uid 99); 16 Mar 2010 23:30:42 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 16 Mar 2010 23:30:42 +0000 X-ASF-Spam-Status: No, hits=-1001.0 required=10.0 tests=ALL_TRUSTED,AWL X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 16 Mar 2010 23:30:39 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 965BB23888CD; Tue, 16 Mar 2010 23:30:18 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r924048 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ Date: Tue, 16 Mar 2010 23:30:18 -0000 To: hive-commits@hadoop.apache.org From: namit@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100316233018.965BB23888CD@eris.apache.org> Author: namit Date: Tue Mar 16 23:30:17 2010 New Revision: 924048 URL: http://svn.apache.org/viewvc?rev=924048&view=rev Log: HIVE-1246. Map join followde by another map join runs in a single map-only job (He Yongqiang via namit) Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery.q hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery.q.out Modified: hadoop/hive/trunk/CHANGES.txt hadoop/hive/trunk/build-common.xml hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java hadoop/hive/trunk/ql/src/test/results/clientpositive/join28.q.out Modified: hadoop/hive/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/CHANGES.txt (original) +++ hadoop/hive/trunk/CHANGES.txt Tue Mar 16 23:30:17 2010 @@ -142,6 +142,9 @@ Trunk - Unreleased HIVE-1216. Show the row with error in mapper/reducer (Zheng Shao via He Yongqiang) + HIVE-1246. Map join followde by another map join runs in a single map-only job + (He Yongqiang via namit) + OPTIMIZATIONS BUG FIXES Modified: hadoop/hive/trunk/build-common.xml URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/build-common.xml (original) +++ hadoop/hive/trunk/build-common.xml Tue Mar 16 23:30:17 2010 @@ -47,7 +47,7 @@ - + Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue Mar 16 23:30:17 2010 @@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -39,10 +38,7 @@ import org.apache.hadoop.hive.ql.plan.ap import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.util.ReflectionUtils; @@ -119,7 +115,7 @@ public class MapJoinOperator extends Abs transient int metadataKeyTag; transient int[] metadataValueTag; transient int maxMapJoinSize; - + public MapJoinOperator() { } @@ -130,7 +126,7 @@ public class MapJoinOperator extends Abs @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - + maxMapJoinSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAXMAPJOINSIZE); @@ -312,15 +308,17 @@ public class MapJoinOperator extends Abs @Override public void closeOp(boolean abort) throws HiveException { - for (HashMapWrapper hashTable : mapJoinTables.values()) { - hashTable.close(); + if(mapJoinTables != null) { + for (HashMapWrapper hashTable : mapJoinTables.values()) { + hashTable.close(); + } } super.closeOp(abort); } - + /** * Implements the getName function for the Node Interface. - * + * * @return the name of the operator */ @Override Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Tue Mar 16 23:30:17 2010 @@ -35,7 +35,6 @@ import org.apache.hadoop.hive.conf.HiveC import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.ql.plan.Fe import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; -import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; @@ -70,6 +68,7 @@ import org.apache.hadoop.hive.ql.plan.Re import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; /** * General utility common functions for the Processor to convert operator into @@ -84,7 +83,7 @@ public final class GenMapRedUtils { /** * Initialize the current plan by adding it to root tasks. - * + * * @param op * the reduce sink operator encountered * @param opProcCtx @@ -130,9 +129,15 @@ public final class GenMapRedUtils { opProcCtx.setCurrAliasId(currAliasId); } + public static void initMapJoinPlan( + Operator op, GenMRProcContext ctx, + boolean readInputMapJoin, boolean readInputUnion, boolean setReducer, int pos) throws SemanticException { + initMapJoinPlan(op, ctx, readInputMapJoin, readInputUnion, setReducer, pos, false); + } + /** * Initialize the current plan by adding it to root tasks. - * + * * @param op * the map join operator encountered * @param opProcCtx @@ -142,7 +147,7 @@ public final class GenMapRedUtils { */ public static void initMapJoinPlan(Operator op, GenMRProcContext opProcCtx, boolean readInputMapJoin, - boolean readInputUnion, boolean setReducer, int pos) + boolean readInputUnion, boolean setReducer, int pos, boolean createLocalPlan) throws SemanticException { Map, GenMapRedCtx> mapCurrCtx = opProcCtx .getMapCurrCtx(); @@ -159,7 +164,7 @@ public final class GenMapRedUtils { // The mapjoin has already been encountered. Some context must be stored // about that if (readInputMapJoin) { - AbstractMapJoinOperator currMapJoinOp = (AbstractMapJoinOperator) opProcCtx.getCurrMapJoinOp(); + AbstractMapJoinOperator currMapJoinOp = opProcCtx.getCurrMapJoinOp(); assert currMapJoinOp != null; boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()) .getPosBigTable())) ? false : true; @@ -197,7 +202,7 @@ public final class GenMapRedUtils { } setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, currMapJoinOp); + setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan); } else { initUnionPlan(opProcCtx, currTask, false); } @@ -219,7 +224,7 @@ public final class GenMapRedUtils { seenOps.add(currTopOp); boolean local = (pos == desc.getPosBigTable()) ? false : true; setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op); + setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalPlan); } opProcCtx.setCurrTask(currTask); @@ -228,9 +233,9 @@ public final class GenMapRedUtils { } private static void setupBucketMapJoinInfo(MapredWork plan, - AbstractMapJoinOperator currMapJoinOp) { + AbstractMapJoinOperator currMapJoinOp, boolean createLocalPlan) { if (currMapJoinOp != null) { - LinkedHashMap>> aliasBucketFileNameMapping = + LinkedHashMap>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping(); if(aliasBucketFileNameMapping!= null) { MapredLocalWork localPlan = plan.getMapLocalWork(); @@ -238,7 +243,7 @@ public final class GenMapRedUtils { if(currMapJoinOp instanceof SMBMapJoinOperator) { localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); } - if (localPlan == null) { + if (localPlan == null && createLocalPlan) { localPlan = new MapredLocalWork( new LinkedHashMap>(), new LinkedHashMap()); @@ -253,6 +258,11 @@ public final class GenMapRedUtils { } } } + + if(localPlan == null) { + return; + } + if(currMapJoinOp instanceof SMBMapJoinOperator) { plan.setMapLocalWork(null); ((SMBMapJoinOperator)currMapJoinOp).getConf().setLocalWork(localPlan); @@ -271,7 +281,7 @@ public final class GenMapRedUtils { /** * Initialize the current union plan. - * + * * @param op * the reduce sink operator encountered * @param opProcCtx @@ -333,9 +343,17 @@ public final class GenMapRedUtils { } } + + public static void joinPlan(Operator op, + Task oldTask, Task task, + GenMRProcContext opProcCtx, int pos, boolean split, + boolean readMapJoinData, boolean readUnionData) throws SemanticException { + joinPlan(op, oldTask, task, opProcCtx, pos, split, readMapJoinData, readUnionData, false); + } + /** * Merge the current task with the task for the current reducer. - * + * * @param op * operator being processed * @param oldTask @@ -350,7 +368,7 @@ public final class GenMapRedUtils { public static void joinPlan(Operator op, Task oldTask, Task task, GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, boolean readUnionData) throws SemanticException { + boolean readMapJoinData, boolean readUnionData, boolean createLocalWork) throws SemanticException { Task currTask = task; MapredWork plan = (MapredWork) currTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); @@ -386,13 +404,13 @@ public final class GenMapRedUtils { } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); if(op instanceof AbstractMapJoinOperator) { - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op); + setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalWork); } } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); } else if (opProcCtx.getCurrMapJoinOp() != null) { - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) opProcCtx.getCurrMapJoinOp(); + AbstractMapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); if (readUnionData) { initUnionPlan(opProcCtx, currTask, false); } else { @@ -400,15 +418,20 @@ public final class GenMapRedUtils { // In case of map-join followed by map-join, the file needs to be // obtained from the old map join - AbstractMapJoinOperator oldMapJoin = (AbstractMapJoinOperator) mjCtx.getOldMapJoin(); + AbstractMapJoinOperator oldMapJoin = mjCtx.getOldMapJoin(); String taskTmpDir = null; TableDesc tt_desc = null; Operator rootOp = null; + boolean local = ((pos == -1) || (pos == (mjOp.getConf()) + .getPosBigTable())) ? false : true; if (oldMapJoin == null) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); + if (opProcCtx.getParseCtx().getListMapJoinOpsNoReducer().contains(mjOp) + || local || (oldTask != null) && (parTasks != null)) { + taskTmpDir = mjCtx.getTaskTmpDir(); + tt_desc = mjCtx.getTTDesc(); + rootOp = mjCtx.getRootMapJoinOp(); + } } else { GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin); assert oldMjCtx != null; @@ -417,22 +440,20 @@ public final class GenMapRedUtils { rootOp = oldMjCtx.getRootMapJoinOp(); } - boolean local = ((pos == -1) || (pos == (mjOp.getConf()) - .getPosBigTable())) ? false : true; setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, oldMapJoin); + setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork); } opProcCtx.setCurrMapJoinOp(null); if ((oldTask != null) && (parTasks != null)) { for (Task parTask : parTasks) { parTask.addDependentTask(currTask); + if(opProcCtx.getRootTasks().contains(currTask)) { + opProcCtx.getRootTasks().remove(currTask); + } } } - if (opProcCtx.getRootTasks().contains(currTask)) { - opProcCtx.getRootTasks().remove(currTask); - } } opProcCtx.setCurrTask(currTask); @@ -440,7 +461,7 @@ public final class GenMapRedUtils { /** * Split the current plan by creating a temporary destination. - * + * * @param op * the reduce sink operator encountered * @param opProcCtx @@ -472,7 +493,7 @@ public final class GenMapRedUtils { /** * set the current task in the mapredWork. - * + * * @param alias_id * current alias * @param topOp @@ -631,7 +652,7 @@ public final class GenMapRedUtils { /** * set the current task in the mapredWork. - * + * * @param alias * current alias * @param topOp @@ -647,6 +668,10 @@ public final class GenMapRedUtils { Operator topOp, MapredWork plan, boolean local, TableDesc tt_desc) throws SemanticException { + if(path == null || alias == null) { + return; + } + if (!local) { if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList()); @@ -673,7 +698,7 @@ public final class GenMapRedUtils { /** * set key and value descriptor. - * + * * @param plan * current plan * @param topOp @@ -707,7 +732,7 @@ public final class GenMapRedUtils { /** * create a new plan and return. - * + * * @return the new plan */ public static MapredWork getMapRedWork() { @@ -723,7 +748,7 @@ public final class GenMapRedUtils { /** * insert in the map for the operator to row resolver. - * + * * @param op * operator created * @param rr @@ -857,7 +882,7 @@ public final class GenMapRedUtils { opProcCtx.setMapJoinCtx(mjOp, mjCtx); opProcCtx.getMapCurrCtx().put(parent, new GenMapRedCtx(childTask, null, null)); - setupBucketMapJoinInfo(cplan, mjOp); + setupBucketMapJoinInfo(cplan, mjOp, false); } currTopOp = null; @@ -937,4 +962,5 @@ public final class GenMapRedUtils { private GenMapRedUtils() { // prevent instantiation } + } Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (original) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java Tue Mar 16 23:30:17 2010 @@ -27,7 +27,6 @@ import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.SelectOperator; @@ -135,7 +134,7 @@ public final class MapJoinFactory { // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); - boolean local = (pos == ((MapJoinDesc)(mapJoin.getConf())).getPosBigTable()) ? false + boolean local = (pos == ((mapJoin.getConf())).getPosBigTable()) ? false : true; GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false, @@ -270,7 +269,7 @@ public final class MapJoinFactory { ctx.getParseCtx(); AbstractMapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); - assert oldMapJoin != null; + GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); if (mjCtx != null) { mjCtx.setOldMapJoin(oldMapJoin); Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Tue Mar 16 23:30:17 2010 @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -31,18 +32,19 @@ import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; @@ -51,29 +53,21 @@ import org.apache.hadoop.hive.ql.lib.Nod import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ErrorMsg; import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; /** * Implementation of one of the rule-based map join optimization. User passes @@ -83,9 +77,9 @@ import org.apache.hadoop.hive.ql.udf.gen * implemented, this transformation can also be done based on costs. */ public class MapJoinProcessor implements Transform { - + private static final Log LOG = LogFactory.getLog(MapJoinProcessor.class.getName()); - + private ParseContext pGraphContext; /** @@ -102,10 +96,10 @@ public class MapJoinProcessor implements pGraphContext.getOpParseCtx().put(op, ctx); return op; } - + /** * convert a regular join to a a map-side join. - * + * * @param op * join operator * @param qbJoin @@ -255,7 +249,7 @@ public class MapJoinProcessor implements keyTableDesc, valueExprMap, valueTableDescs, outputColumnNames, mapJoinPos, joinCondns), new RowSchema(outputRS.getColumnInfos()), newPar), outputRS); - + mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs()); mapJoinOp.setColumnExprMap(colExprMap); @@ -340,7 +334,7 @@ public class MapJoinProcessor implements /** * Is it a map-side join. - * + * * @param op * join operator * @param qbJoin @@ -386,7 +380,7 @@ public class MapJoinProcessor implements /** * Transform the query tree. For each join, check if it is a map-side join * (user specified). If yes, convert it to a map-side join. - * + * * @param pactx * current parse context */ @@ -438,15 +432,13 @@ public class MapJoinProcessor implements getMapJoinFS()); opRules.put(new RuleRegExp(new String("R2"), "MAPJOIN%.*RS%"), getMapJoinDefault()); - opRules.put(new RuleRegExp(new String("R3"), "MAPJOIN%.*MAPJOIN%"), - getMapJoinDefault()); opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*UNION%"), getMapJoinDefault()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, - new MapJoinWalkerCtx(listMapJoinOpsNoRed)); + new MapJoinWalkerCtx(listMapJoinOpsNoRed, pGraphContext)); GraphWalker ogw = new GenMapRedWalker(disp); ArrayList topNodes = new ArrayList(); @@ -472,9 +464,129 @@ public class MapJoinProcessor implements MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; MapJoinOperator mapJoin = (MapJoinOperator) nd; - ctx.setCurrMapJoinOp(mapJoin); + if (ctx.getListRejectedMapJoins() != null && !ctx.getListRejectedMapJoins().contains(mapJoin)) { + //for rule: MapJoin%.*MapJoin + // have a child mapjoin. if the the current mapjoin is on a local work, + // will put the current mapjoin in the rejected list. + Boolean bigBranch = findGrandChildSubqueryMapjoin(ctx, mapJoin); + if (bigBranch == null) { // no child map join + ctx.setCurrMapJoinOp(mapJoin); + return null; + } + if(bigBranch) { + addNoReducerMapJoinToCtx(ctx, mapJoin); + } else { + addRejectMapJoinToCtx(ctx, mapJoin); + } + } else { + ctx.setCurrMapJoinOp(mapJoin); + } return null; } + + private Boolean findGrandChildSubqueryMapjoin(MapJoinWalkerCtx ctx, MapJoinOperator mapJoin) { + Operator parent = mapJoin; + while (true) { + if(parent.getChildOperators() == null || parent.getChildOperators().size() != 1) { + return null; + } + Operator ch = parent.getChildOperators().get(0); + if(ch instanceof MapJoinOperator) { + if (!nonSubqueryMapJoin(ctx.getpGraphContext(), (MapJoinOperator) ch, + mapJoin)) { + if (ch.getParentOperators().indexOf(parent) == ((MapJoinOperator) ch) + .getConf().getPosBigTable()) { + //not come from the local branch + return true; + } + } + return false; // not from a sub-query. + } + + if ((ch instanceof JoinOperator) + || (ch instanceof UnionOperator) + || (ch instanceof ReduceSinkOperator) + || (ch instanceof LateralViewJoinOperator) + || (ch instanceof GroupByOperator) + || (ch instanceof ScriptOperator)) { + return null; + } + + parent = ch; + } + } + + private boolean nonSubqueryMapJoin(ParseContext pGraphContext, + MapJoinOperator mapJoin, MapJoinOperator parentMapJoin) { + QBJoinTree joinTree = pGraphContext.getMapJoinContext().get(mapJoin); + QBJoinTree parentJoinTree = pGraphContext.getMapJoinContext().get(parentMapJoin); + if(joinTree.getJoinSrc() != null && joinTree.getJoinSrc().equals(parentJoinTree)) { + return true; + } + return false; + } + } + + private static void addNoReducerMapJoinToCtx(MapJoinWalkerCtx ctx, + AbstractMapJoinOperator mapJoin) { + if (ctx.getListRejectedMapJoins() != null + && ctx.getListRejectedMapJoins().contains(mapJoin)) { + return; + } + List> listMapJoinsNoRed = ctx.getListMapJoinsNoRed(); + if (listMapJoinsNoRed == null) { + listMapJoinsNoRed = new ArrayList>(); + } + if (!listMapJoinsNoRed.contains(mapJoin)) { + listMapJoinsNoRed.add(mapJoin); + } + ctx.setListMapJoins(listMapJoinsNoRed); + } + + private static void addRejectMapJoinToCtx(MapJoinWalkerCtx ctx, + AbstractMapJoinOperator mapjoin) { + // current map join is null means it has been handled by CurrentMapJoin + // process. + if(mapjoin == null) { + return; + } + List> listRejectedMapJoins = ctx.getListRejectedMapJoins(); + if (listRejectedMapJoins == null) { + listRejectedMapJoins = new ArrayList>(); + } + if (!listRejectedMapJoins.contains(mapjoin)) { + listRejectedMapJoins.add(mapjoin); + } + + if (ctx.getListMapJoinsNoRed() != null + && ctx.getListMapJoinsNoRed().contains(mapjoin)) { + ctx.getListMapJoinsNoRed().remove(mapjoin); + } + + ctx.setListRejectedMapJoins(listRejectedMapJoins); + } + + private static int findGrandparentBranch(Operator currOp, Operator grandParent) { + int pos = -1; + for (int i = 0; i < currOp.getParentOperators().size(); i++) { + List> parentOpList = new LinkedList>(); + parentOpList.add(currOp.getParentOperators().get(i)); + boolean found = false; + while (!parentOpList.isEmpty()) { + Operator p = parentOpList.remove(0); + if(p == grandParent) { + found = true; + break; + } else if (p.getParentOperators() != null){ + parentOpList.addAll(p.getParentOperators()); + } + } + if(found) { + pos = i; + break; + } + } + return pos; } /** @@ -500,13 +612,7 @@ public class MapJoinProcessor implements && (listRejectedMapJoins.contains(mapJoin))) { return null; } - - List> listMapJoinsNoRed = ctx.getListMapJoinsNoRed(); - if (listMapJoinsNoRed == null) { - listMapJoinsNoRed = new ArrayList>(); - } - listMapJoinsNoRed.add(mapJoin); - ctx.setListMapJoins(listMapJoinsNoRed); + addNoReducerMapJoinToCtx(ctx, mapJoin); return null; } } @@ -525,13 +631,7 @@ public class MapJoinProcessor implements Object... nodeOutputs) throws SemanticException { MapJoinWalkerCtx ctx = (MapJoinWalkerCtx) procCtx; AbstractMapJoinOperator mapJoin = ctx.getCurrMapJoinOp(); - List> listRejectedMapJoins = ctx - .getListRejectedMapJoins(); - if (listRejectedMapJoins == null) { - listRejectedMapJoins = new ArrayList>(); - } - listRejectedMapJoins.add(mapJoin); - ctx.setListRejectedMapJoins(listRejectedMapJoins); + addRejectMapJoinToCtx(ctx, mapJoin); return null; } } @@ -573,17 +673,21 @@ public class MapJoinProcessor implements * */ public static class MapJoinWalkerCtx implements NodeProcessorCtx { + + private ParseContext pGraphContext; private List> listMapJoinsNoRed; private List> listRejectedMapJoins; private AbstractMapJoinOperator currMapJoinOp; /** * @param listMapJoinsNoRed + * @param pGraphContext2 */ - public MapJoinWalkerCtx(List> listMapJoinsNoRed) { + public MapJoinWalkerCtx(List> listMapJoinsNoRed, ParseContext pGraphContext) { this.listMapJoinsNoRed = listMapJoinsNoRed; currMapJoinOp = null; listRejectedMapJoins = new ArrayList>(); + this.pGraphContext = pGraphContext; } /** @@ -631,5 +735,14 @@ public class MapJoinProcessor implements List> listRejectedMapJoins) { this.listRejectedMapJoins = listRejectedMapJoins; } + + public ParseContext getpGraphContext() { + return pGraphContext; + } + + public void setpGraphContext(ParseContext pGraphContext) { + this.pGraphContext = pGraphContext; + } + } } Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery.q URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery.q?rev=924048&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery.q (added) +++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_subquery.q Tue Mar 16 23:30:17 2010 @@ -0,0 +1,28 @@ +EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); + +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); + +EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) + order by subq.key1; + + +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) + order by subq.key1; Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/join28.q.out URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join28.q.out?rev=924048&r1=924047&r2=924048&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/test/results/clientpositive/join28.q.out (original) +++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join28.q.out Tue Mar 16 23:30:17 2010 @@ -28,11 +28,10 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-5 depends on stages: Stage-2 , consists of Stage-4, Stage-3 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3 + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,18 +52,59 @@ STAGE PLANS: 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 Local Work: Map Reduce Local Work Alias -> Map Local Tables: subq:x Fetch Operator limit: -1 + z + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: subq:x TableScan @@ -81,68 +121,50 @@ STAGE PLANS: 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/296504271/10002 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 Select Operator expressions: expr: _col0 type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - Local Work: - Map Reduce Local Work - Alias -> Map Local Tables: - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 z TableScan alias: z @@ -193,14 +215,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - Stage: Stage-5 + Stage: Stage-4 Conditional Operator - Stage: Stage-4 + Stage: Stage-3 Move Operator files: hdfs directory: true - destination: file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/562095980/10000 + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_16-22-31_248_3138742764068185194/10000 Stage: Stage-0 Move Operator @@ -212,10 +234,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/296504271/10003 + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_16-22-31_248_3138742764068185194/10002 Reduce Output Operator sort order: Map-reduce partition columns: @@ -264,11 +286,11 @@ POSTHOOK: Output: default@dest_j1 PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/1591031497/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_16-22-38_314_5886463496780818123/10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/data/users/heyongqiang/hive-trunk/.ptest_0/build/ql/tmp/1591031497/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_16-22-38_314_5886463496780818123/10000 128 val_128 128 val_128 128 val_128 Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery.q.out URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery.q.out?rev=924048&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery.q.out (added) +++ hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_subquery.q.out Tue Mar 16 23:30:17 2010 @@ -0,0 +1,659 @@ +PREHOOK: query: EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:y + TableScan + alias: y + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:x + Fetch Operator + limit: -1 + z + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + z + TableScan + alias: z + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = 11)) + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (hr = 11) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_19-27-29_426_1006789162213353166/10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_19-27-29_426_1006789162213353166/10000 +238 val_238 +238 val_238 +311 val_311 +311 val_311 +311 val_311 +255 val_255 +255 val_255 +278 val_278 +278 val_278 +98 val_98 +98 val_98 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +150 val_150 +273 val_273 +273 val_273 +273 val_273 +224 val_224 +224 val_224 +369 val_369 +369 val_369 +369 val_369 +66 val_66 +128 val_128 +128 val_128 +128 val_128 +213 val_213 +213 val_213 +146 val_146 +146 val_146 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +128 val_128 +128 val_128 +128 val_128 +311 val_311 +311 val_311 +311 val_311 +213 val_213 +213 val_213 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +98 val_98 +98 val_98 +369 val_369 +369 val_369 +369 val_369 +238 val_238 +238 val_238 +273 val_273 +273 val_273 +273 val_273 +224 val_224 +224 val_224 +369 val_369 +369 val_369 +369 val_369 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +128 val_128 +128 val_128 +128 val_128 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +255 val_255 +255 val_255 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +146 val_146 +146 val_146 +273 val_273 +273 val_273 +273 val_273 +PREHOOK: query: EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) + order by subq.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) + order by subq.key1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:y + TableScan + alias: y + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + subq:x + Fetch Operator + limit: -1 + z + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + z + TableScan + alias: z + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = 11)) + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (hr = 11) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_19-27-34_660_8681994720023430294/10002 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) + order by subq.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_19-27-35_605_8033797734197412586/10000 +POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) + order by subq.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-03-15_19-27-35_605_8033797734197412586/10000 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +150 val_150 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +66 val_66 +98 val_98 +98 val_98 +98 val_98 +98 val_98