hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r758494 [1/3] - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/lib/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/ ql/...
Date Thu, 26 Mar 2009 01:26:39 GMT
Author: zshao
Date: Thu Mar 26 01:26:19 2009
New Revision: 758494

URL: http://svn.apache.org/viewvc?rev=758494&view=rev
Log:
HIVE-318. Fix union all queries. (Namit Jain via zshao)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union10.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union11.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union12.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union13.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union4.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union5.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union6.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union7.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union8.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union9.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union10.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union11.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union12.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union13.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union6.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union8.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union9.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/union2.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/cluster.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part7.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/union3.q.out
    hadoop/hive/trunk/ql/src/test/results/compiler/plan/union.q.xml

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Mar 26 01:26:19 2009
@@ -22,8 +22,6 @@
 
   BUG FIXES
 
-    HIVE-342. Fix TestMTQueries. (Prasad Chakka via zshao)
-
     HIVE-349. Fix TestCliDriver when there are test cases with no tasks.
     (Suresh Antony via zshao)
 
@@ -81,6 +79,8 @@
     HIVE-342. Fix TestMTQueries
     (Prasad Chakka via namit)
 
+    HIVE-318. Fix union all queries. (Namit Jain via zshao)
+
 Release 0.2.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Thu Mar 26 01:26:19 2009
@@ -49,6 +49,13 @@
   protected List<Operator<? extends Serializable>> parentOperators;
   private static int seqId;
 
+  // It can be opimized later so that an operator operator (init/close) is performed
+  // only after that operation has been performed on all the parents. This will require
+  // initializing the whole tree in all the mappers (which might be required for mappers
+  // spanning multiple files anyway, in future)
+  public static enum State { UNINIT, INIT, CLOSE };
+  transient private State state = State.UNINIT;
+
   static {
     seqId = 0;
   }
@@ -213,6 +220,11 @@
   }
 
   public void initialize (Configuration hconf, Reporter reporter) throws HiveException {
+    if (state == state.INIT) {
+      LOG.info("Already Initialized");
+      return;
+    }
+
     LOG.info("Initializing Self");
     this.reporter = reporter;
     
@@ -223,6 +235,9 @@
     for(Operator<? extends Serializable> op: childOperators) {
       op.initialize(hconf, reporter);
     }    
+
+    state = State.INIT;
+
     LOG.info("Initialization Done");
   }
 
@@ -258,18 +273,23 @@
   }
 
   public void close(boolean abort) throws HiveException {
-      try {
-        logStats();
-        if(childOperators == null)
-          return;
-
-        for(Operator<? extends Serializable> op: childOperators) {
-          op.close(abort);
-        }
-      } catch (HiveException e) {
-        e.printStackTrace();
-        throw e;
+    if (state == state.CLOSE) 
+      return;
+
+    try {
+      logStats();
+      if(childOperators == null)
+        return;
+
+      for(Operator<? extends Serializable> op: childOperators) {
+        op.close(abort);
       }
+
+      state = State.CLOSE;
+    } catch (HiveException e) {
+      e.printStackTrace();
+      throw e;
+    }
   }
 
   /**
@@ -319,7 +339,6 @@
     }
   }
 
-
   public static interface OperatorFunc {
     public void func(Operator<? extends Serializable> op);
   }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Thu Mar 26 01:26:19 2009
@@ -50,6 +50,7 @@
     opvec.add(new opTuple<joinDesc> (joinDesc.class, JoinOperator.class));
     opvec.add(new opTuple<limitDesc> (limitDesc.class, LimitOperator.class));
     opvec.add(new opTuple<tableScanDesc> (tableScanDesc.class, TableScanOperator.class));
+    opvec.add(new opTuple<unionDesc> (unionDesc.class, UnionOperator.class));
   }
               
 

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.*;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.unionDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Union Operator
+ * Just forwards. Doesn't do anything itself.
+ **/
+public class UnionOperator extends  Operator<unionDesc>  implements Serializable {
+  private static final long serialVersionUID = 1L;
+
+  @Override
+  public void process(Object row, ObjectInspector rowInspector)
+      throws HiveException {
+    forward(row, rowInspector);    
+  }
+
+  /**
+   * @return the name of the operator
+   */
+  @Override
+  public String getName() {
+    return new String("UNION");
+  }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultRuleDispatcher.java Thu Mar 26 01:26:19 2009
@@ -77,7 +77,7 @@
     // Do nothing in case proc is null
     if (proc != null) {
       // Call the process function
-      return proc.process(nd, procCtx, nodeOutputs);
+      return proc.process(nd, ndStack, procCtx, nodeOutputs);
     }
     else
       return null;

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessor.java Thu Mar 26 01:26:19 2009
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hive.ql.lib;
 
 
+import java.util.Stack;
+
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
 /**
@@ -34,6 +36,6 @@
    * @return Object to be returned by the process call
    * @throws SemanticException
    */
-  public Object process(Node nd, NodeProcessorCtx procCtx, Object... nodeOutputs) 
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) 
     throws SemanticException;
 }

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderWalker.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.lib;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.Stack;
+import java.lang.Object;
+
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * base class for operator graph walker
+ * this class takes list of starting ops and walks them one by one.
+ */
+public class PreOrderWalker extends DefaultGraphWalker {
+
+  /* 
+   * Since the operator tree is a DAG, nodes with mutliple parents will be visited more than once.
+   * This can be made configurable.
+   */
+
+  /**
+   * Constructor
+   * @param ctx graph of operators to walk
+   * @param disp dispatcher to call for each op encountered
+   */
+  public PreOrderWalker(Dispatcher disp) {
+    super(disp);
+  }
+
+  /**
+   * walk the current operator and its descendants
+   * @param nd current operator in the graph
+   * @throws SemanticException
+   */
+  public void walk(Node nd) throws SemanticException {
+    opStack.push(nd);
+    dispatch(nd, opStack);
+    
+    // move all the children to the front of queue
+    if (nd.getChildren() != null) 
+      for (Node n : nd.getChildren())
+        walk(n);
+
+    opStack.pop();
+  }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Thu Mar 26 01:26:19 2009
@@ -22,6 +22,7 @@
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -54,7 +55,7 @@
    * Node Processor for Column Pruning on Filter Operators.
    */
   public static class ColumnPrunerFilterProc implements NodeProcessor {  
-    public Object process(Node nd, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
       FilterOperator op = (FilterOperator)nd;
       ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
       exprNodeDesc condn = op.getConf().getPredicate();
@@ -78,7 +79,7 @@
    * Node Processor for Column Pruning on Group By Operators.
    */
   public static class ColumnPrunerGroupByProc implements NodeProcessor {
-    public Object process(Node nd, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
       GroupByOperator op = (GroupByOperator)nd;
       ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
       List<String> colLists = new ArrayList<String>();
@@ -111,7 +112,7 @@
    * The Default Node Processor for Column Pruning.
    */
   public static class ColumnPrunerDefaultProc implements NodeProcessor {
-    public Object process(Node nd, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
       ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
       cppCtx.getPrunedColLists().put((Operator<? extends Serializable>)nd, 
           cppCtx.genColLists((Operator<? extends Serializable>)nd));
@@ -132,7 +133,7 @@
    * The Node Processor for Column Pruning on Reduce Sink Operators.
    */
   public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
-    public Object process(Node nd, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
       ReduceSinkOperator op = (ReduceSinkOperator)nd;
       ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
       HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap = 
@@ -191,7 +192,7 @@
    * The Node Processor for Column Pruning on Select Operators.
    */
   public static class ColumnPrunerSelectProc implements NodeProcessor {
-    public Object process(Node nd, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
       SelectOperator op = (SelectOperator)nd;
       ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
       List<String> cols = new ArrayList<String>();

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Thu Mar 26 01:26:19 2009
@@ -20,10 +20,10 @@
 
 import java.util.List;
 import java.util.HashMap;
+import java.util.Stack;
 import java.io.Serializable;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.plan.mapredWork;
 import org.apache.hadoop.hive.ql.lib.Node;
@@ -44,8 +44,7 @@
    * @param nd the file sink operator encountered
    * @param opProcCtx context
    */
-  public Object process(Node nd, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
-    FileSinkOperator op = (FileSinkOperator)nd;
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
     GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
     boolean ret = false;
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMROperator.java Thu Mar 26 01:26:19 2009
@@ -20,6 +20,7 @@
 
 import java.io.Serializable;
 import java.util.Map;
+import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -41,7 +42,7 @@
    * @param nd the reduce sink operator encountered
    * @param procCtx context
    */
-  public Object process(Node nd, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
     GenMRProcContext ctx = (GenMRProcContext)procCtx;
 
     Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java Thu Mar 26 01:26:19 2009
@@ -22,34 +22,13 @@
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Set;
-import java.util.Stack;
 import java.io.Serializable;
-import java.io.File;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.plan.mapredWork;
-import org.apache.hadoop.hive.ql.plan.reduceSinkDesc;
-import org.apache.hadoop.hive.ql.plan.tableDesc;
-import org.apache.hadoop.hive.ql.plan.partitionDesc;
-import org.apache.hadoop.hive.ql.plan.fileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.metadata.*;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.fs.Path;
 
 /**
  * Processor Context for creating map reduce task. Walk the tree in a DFS manner and process the nodes. Some state is 
@@ -99,8 +78,9 @@
       return currAliasId;
     }
   }
-  
+
   private HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap;
+  private HashMap<UnionOperator, Task<? extends Serializable>>   unionTaskMap;
   private List<Operator<? extends Serializable>> seenOps;
 
   private ParseContext                          parseCtx;
@@ -151,6 +131,7 @@
     currAliasId     = null;
     rootOps         = new ArrayList<Operator<? extends Serializable>>();
     rootOps.addAll(parseCtx.getTopOps().values());
+    unionTaskMap = new HashMap<UnionOperator, Task<? extends Serializable>>();
   }
 
   /**
@@ -334,4 +315,12 @@
   public void setCurrAliasId(String currAliasId) {
     this.currAliasId = currAliasId;
   }
+
+  public Task<? extends Serializable> getUnionTask(UnionOperator op) {
+    return unionTaskMap.get(op);
+  }
+
+  public void setUnionTask(UnionOperator op, Task<? extends Serializable> uTask) {
+    unionTaskMap.put(op, uTask);
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java Thu Mar 26 01:26:19 2009
@@ -20,6 +20,7 @@
 
 import java.util.Map;
 import java.util.HashMap;
+import java.util.Stack;
 import java.io.Serializable;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
@@ -45,7 +46,7 @@
    * @param nd the reduce sink operator encountered
    * @param opProcCtx context
    */
-  public Object process(Node nd, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
     ReduceSinkOperator op = (ReduceSinkOperator)nd;
     GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java Thu Mar 26 01:26:19 2009
@@ -20,6 +20,7 @@
 
 import java.io.Serializable;
 import java.util.Map;
+import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
@@ -43,7 +44,7 @@
    * @param nd the reduce sink operator encountered
    * @param opProcCtx context
    */
-  public Object process(Node nd, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
     ReduceSinkOperator op = (ReduceSinkOperator)nd;
     GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java Thu Mar 26 01:26:19 2009
@@ -20,6 +20,7 @@
 
 import java.io.Serializable;
 import java.util.Map;
+import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -44,7 +45,7 @@
    * @param nd the table sink operator encountered
    * @param opProcCtx context
    */
-  public Object process(Node nd, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
     TableScanOperator op = (TableScanOperator)nd;
     GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
     ParseContext parseCtx = ctx.getParseCtx();

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Stack;
+import java.io.Serializable;
+import java.io.File;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.plan.mapredWork;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.tableDesc;
+import org.apache.hadoop.hive.ql.plan.partitionDesc;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.plan.fileSinkDesc;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcFactory;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext;
+
+/**
+ * Processor for the rule - any operator tree followed by union
+ */
+public class GenMRUnion1 implements NodeProcessor {
+
+  public GenMRUnion1() {
+  }
+
+  /**
+   * Union Operator encountered .
+   * Currently, the algorithm is pretty simple:
+   *   If all the sub-queries are map-only, dont do anything.
+   *   Otherwise, insert a FileSink on top of all the sub-queries.
+   *
+   * This can be optimized later on.
+   * @param nd the file sink operator encountered
+   * @param opProcCtx context
+   */
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
+    UnionOperator union = (UnionOperator)nd;
+    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;
+    ParseContext parseCtx = ctx.getParseCtx();
+    UnionParseContext uPrsCtx = parseCtx.getUCtx().getUnionParseContext(union);
+    assert uPrsCtx != null;
+
+    // The plan needs to be broken only if one of the sub-queries involve a map-reduce job
+    int numInputs = uPrsCtx.getNumInputs();
+    boolean mapOnly = true;
+    int pos = 0;
+    for (pos = 0; pos < numInputs; pos++) {
+      if (!uPrsCtx.getMapOnlySubq(pos)) {
+        mapOnly = false;
+        break;
+      }
+    }
+
+    // Map-only subqueries can be optimized in future to not write to a file in future
+    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
+
+    if (mapOnly) {
+      mapCurrCtx.put((Operator<? extends Serializable>)nd, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
+      return null;
+    }
+
+    Task<? extends Serializable> currTask = ctx.getCurrTask();
+    pos = UnionProcFactory.getPositionParent(union, stack);
+
+    // is the current task a root task
+    if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask)))
+      ctx.getRootTasks().add(currTask);
+    
+    Task<? extends Serializable> uTask = ctx.getUnionTask(union);
+
+    pos = UnionProcFactory.getPositionParent(union, stack);
+    Operator<? extends Serializable> parent = union.getParentOperators().get(pos);   
+    mapredWork uPlan = null;
+
+    // union is encountered for the first time
+    if (uTask == null) {
+      uPlan = GenMapRedUtils.getMapRedWork();
+      uTask = TaskFactory.get(uPlan, parseCtx.getConf());
+      ctx.setUnionTask(union, uTask);
+    }
+    else 
+      uPlan = (mapredWork)uTask.getWork();
+    
+    tableDesc tt_desc = 
+      PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); 
+    
+    // generate the temporary file
+    String scratchDir = ctx.getScratchDir();
+    int randomid = ctx.getRandomId();
+    int pathid   = ctx.getPathId();
+    
+    String taskTmpDir = (new Path(scratchDir + File.separator + randomid + '.' + pathid)).toString();
+    
+    pathid++;
+    ctx.setPathId(pathid);
+    
+    // Add the path to alias mapping
+    assert uPlan.getPathToAliases().get(taskTmpDir) == null;
+    uPlan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
+    uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
+    uPlan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
+    uPlan.getAliasToWork().put(taskTmpDir, union);
+    GenMapRedUtils.setKeyAndValueDesc(uPlan, union);
+    
+    // Create a file sink operator for this file name
+    Operator<? extends Serializable> fs_op =
+      OperatorFactory.get
+      (new fileSinkDesc(taskTmpDir, tt_desc,
+                        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
+       parent.getSchema());
+    
+    assert parent.getChildOperators().size() == 1;
+    parent.getChildOperators().set(0, fs_op);
+
+    List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
+    parentOpList.add(parent);
+    fs_op.setParentOperators(parentOpList);
+
+    currTask.addDependentTask(uTask);
+
+    // If it is map-only task, add the files to be processed
+    if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos))
+      GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), (mapredWork) currTask.getWork(), false, ctx);
+
+    ctx.setCurrTask(uTask);
+    ctx.setCurrAliasId(null);
+    ctx.setCurrTopOp(null);
+
+    mapCurrCtx.put((Operator<? extends Serializable>)nd, new GenMapRedCtx(ctx.getCurrTask(), null, null));
+    
+    return null;
+  }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Thu Mar 26 01:26:19 2009
@@ -233,7 +233,7 @@
    * @param plan     current plan
    * @param topOp    current top operator in the path
    */
-  private static void setKeyAndValueDesc(mapredWork plan, Operator<? extends Serializable> topOp) {
+  public static void setKeyAndValueDesc(mapredWork plan, Operator<? extends Serializable> topOp) {
     if (topOp instanceof ReduceSinkOperator) {
       ReduceSinkOperator rs = (ReduceSinkOperator)topOp;
       plan.setKeyDesc(rs.getConf().getKeySerializeInfo());

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Thu Mar 26 01:26:19 2009
@@ -23,6 +23,7 @@
 
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor;
 
 /**
  * Implementation of the optimizer
@@ -43,6 +44,7 @@
 	public void initialize() {
 		transformations = new ArrayList<Transform>();
 		transformations.add(new ColumnPruner());
+		transformations.add(new UnionProcessor());
 	}
 	
 	/**

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.unionproc;
+
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+
+public class UnionProcContext implements NodeProcessorCtx {
+
+  public static class UnionParseContext {
+    transient private boolean[] mapOnlySubq;
+    transient private boolean[] rootTask;
+    
+    // currently, union has 2 inputs, but that should change - merging of unions should be allowed
+    transient private int numInputs = 2;
+
+    public UnionParseContext() {
+      mapOnlySubq = new boolean[numInputs];
+      rootTask    = new boolean[numInputs];
+    }
+    
+    public boolean getMapOnlySubq(int pos) {
+      return mapOnlySubq[pos];
+    }
+    
+    public void setMapOnlySubq(int pos, boolean mapOnlySubq) {
+      this.mapOnlySubq[pos] = mapOnlySubq;
+    }
+    
+    public boolean getRootTask(int pos) {
+      return rootTask[pos];
+    }
+    
+    public void setRootTask(int pos, boolean rootTask) {
+      this.rootTask[pos] = rootTask;
+    }
+    
+    public int getNumInputs() {
+      return numInputs;
+    }
+    
+    public void setNumInputs(int numInputs) {
+      this.numInputs = numInputs;
+    }
+  }
+   
+  private Map<UnionOperator, UnionParseContext> uCtxMap;
+
+  public UnionProcContext() {
+    uCtxMap = new HashMap<UnionOperator, UnionParseContext>();
+  }
+
+  public void setUnionParseContext(UnionOperator u, UnionParseContext uCtx) { 
+    uCtxMap.put(u, uCtx);
+  }
+
+  public UnionParseContext getUnionParseContext(UnionOperator u) { 
+    return uCtxMap.get(u);
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.unionproc;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext;
+
+/**
+ * Operator factory for union processing
+ */
+public class UnionProcFactory {
+
+  public static int getPositionParent(UnionOperator union, Stack<Node> stack) {
+    int pos = 0;
+    int size = stack.size();
+    assert size >= 2 && stack.get(size - 1) == union;
+    Operator<? extends Serializable> parent = (Operator<? extends Serializable>)stack.get(size - 2);
+    List<Operator<? extends Serializable>> parUnion = union.getParentOperators();
+    pos = parUnion.indexOf(parent);
+    assert pos < parUnion.size(); 
+    return pos;
+  }
+  
+  /**
+   * MapRed subquery followed by Union
+   */
+  public static class MapRedUnion implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      UnionOperator union = (UnionOperator)nd;
+      UnionProcContext ctx = (UnionProcContext) procCtx;
+
+      // find the branch on which this processor was invoked
+      int pos = getPositionParent(union, stack);
+      UnionParseContext uCtx = ctx.getUnionParseContext(union);
+      if (uCtx == null)
+        uCtx = new UnionParseContext();
+
+      uCtx.setMapOnlySubq(pos, false);
+      uCtx.setRootTask(pos, false);
+      ctx.setUnionParseContext(union, uCtx); 
+      return null;
+    }
+  }
+
+  /**
+   * Map-only subquery followed by Union
+   */
+  public static class MapUnion implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      UnionOperator union = (UnionOperator)nd;
+      UnionProcContext ctx = (UnionProcContext) procCtx;
+
+      // find the branch on which this processor was invoked
+      int pos = getPositionParent(union, stack);
+      UnionParseContext uCtx = ctx.getUnionParseContext(union);
+      if (uCtx == null)
+        uCtx = new UnionParseContext();
+
+      uCtx.setMapOnlySubq(pos, true);
+      uCtx.setRootTask(pos, true);
+      ctx.setUnionParseContext(union, uCtx); 
+      return null;
+    }
+  }
+
+  /**
+   * Union subquery followed by Union
+   */
+  public static class UnknownUnion implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      UnionOperator union = (UnionOperator)nd;
+      UnionProcContext ctx = (UnionProcContext) procCtx;
+
+      // find the branch on which this processor was invoked
+      int pos = getPositionParent(union, stack);
+      UnionParseContext uCtx = ctx.getUnionParseContext(union);
+      if (uCtx == null)
+        uCtx = new UnionParseContext();
+
+      uCtx.setMapOnlySubq(pos, true);
+      uCtx.setRootTask(pos, false);
+      ctx.setUnionParseContext(union, uCtx); 
+      return null;
+    }
+  }
+  
+  /**
+   * Default processor
+   */
+  public static class NoUnion implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      return null;
+    }
+  }
+
+  public static NodeProcessor getMapRedUnion() {
+    return new MapRedUnion();
+  }
+
+  public static NodeProcessor getMapUnion() {
+    return new MapUnion();
+  }
+
+  public static NodeProcessor getUnknownUnion() {
+    return new UnknownUnion();
+  }
+  
+  public static NodeProcessor getNoUnion() {
+    return new NoUnion();
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.unionproc;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.optimizer.Transform;
+
+/**
+ * Implementation of the union processor. This can be enhanced later on.
+ * Currently, it does the following:
+ *   Identify if both the subqueries of UNION are map-only. 
+ *   Store that fact in the unionDesc/UnionOperator. 
+ *   If either of the sub-query involves a map-reduce job, a FS is introduced on top of the UNION.
+ *   This can be later optimized to clone all the operators above the UNION.
+
+ * The parse Context is not changed.
+ */
+public class UnionProcessor implements Transform {
+
+  /**
+   * empty constructor
+   */
+  public UnionProcessor() { }
+
+  /**
+   * Transform the query tree. For each union, store the fact whether both the 
+   * sub-queries are map-only
+   * @param pactx the current parse context
+   */
+  public ParseContext transform(ParseContext pCtx) throws SemanticException {
+    // create a walker which walks the tree in a DFS manner while maintaining the operator stack.
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+    opRules.put(new RuleRegExp(new String("R1"), "RS%.*UNION%"), UnionProcFactory.getMapRedUnion());
+    opRules.put(new RuleRegExp(new String("R2"), "UNION%.*UNION%"), UnionProcFactory.getUnknownUnion());
+    opRules.put(new RuleRegExp(new String("R3"), "TS%.*UNION%"), UnionProcFactory.getMapUnion());
+
+    // The dispatcher fires the processor for the matching rule and passes the context along
+    UnionProcContext uCtx = new UnionProcContext();
+    Dispatcher disp = new DefaultRuleDispatcher(UnionProcFactory.getNoUnion(), opRules, uCtx);
+    GraphWalker ogw = new PreOrderWalker(disp);
+   
+    // Create a list of topop nodes
+    ArrayList<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(pCtx.getTopOps().values());
+    ogw.startWalking(topNodes, null);
+    pCtx.setUCtx(uCtx);
+    
+    return pCtx;
+  }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Thu Mar 26 01:26:19 2009
@@ -27,6 +27,7 @@
 import org.apache.hadoop.hive.ql.plan.loadTableDesc;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
 
 /**
  * Parse Context: The current parse context. This is passed to the optimizer
@@ -52,7 +53,8 @@
   private HiveConf conf;
   private HashMap<String, String> idToTableNameMap;
   private int destTableId;
-
+  private UnionProcContext uCtx;
+  
   /**
    * @param qb
    *          current QB
@@ -81,7 +83,7 @@
       HashMap<String, Operator<? extends Serializable>> topSelOps,
       HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx,
       List<loadTableDesc> loadTableWork, List<loadFileDesc> loadFileWork,
-      Context ctx, HashMap<String, String> idToTableNameMap, int destTableId) {
+      Context ctx, HashMap<String, String> idToTableNameMap, int destTableId, UnionProcContext uCtx) {
     this.conf = conf;
     this.qb = qb;
     this.ast = ast;
@@ -95,6 +97,7 @@
     this.ctx = ctx;
     this.idToTableNameMap = idToTableNameMap;
     this.destTableId = destTableId;
+    this.uCtx = uCtx;
   }
 
   /**
@@ -281,5 +284,12 @@
     this.destTableId = destTableId;
   }
   
-  
+  public UnionProcContext getUCtx() {
+    return uCtx;
+  }
+
+  public void setUCtx(UnionProcContext uCtx) {
+    this.uCtx = uCtx;
+  }
+
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrintOpTreeProcessor.java Thu Mar 26 01:26:19 2009
@@ -21,6 +21,8 @@
 import java.io.PrintStream;
 import java.io.Serializable;
 import java.util.HashMap;
+import java.util.Stack;
+
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -70,7 +72,7 @@
     return ret.toString();
   }
   
-  public Object process(Node nd, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
     Operator<? extends Serializable> op = (Operator<? extends Serializable>)nd;
     if (opMap.get(op) == null) {
       opMap.put(op, curNum++);

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Mar 26 01:26:19 2009
@@ -49,9 +49,11 @@
 import org.apache.hadoop.hive.ql.optimizer.Optimizer;
 import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext;
 import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
 import org.apache.hadoop.hive.ql.optimizer.GenMROperator;
 import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1;
 import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1;
+import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1;
 import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1;
 import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2;
 import org.apache.hadoop.hive.ql.plan.*;
@@ -82,6 +84,7 @@
   private QB qb;
   private ASTNode ast;
   private int destTableId;
+  private UnionProcContext uCtx;
 
   private static class Phase1Ctx {
     String dest;
@@ -100,8 +103,7 @@
     this.loadFileWork = new ArrayList<loadFileDesc>();
     opParseCtx = new HashMap<Operator<? extends Serializable>, OpParseContext>();
     this.destTableId = 1;
- 
-    
+    this.uCtx = null;
   }
   
 
@@ -117,6 +119,7 @@
     this.idToTableNameMap.clear();
     qb = null;
     ast = null;
+    uCtx = null;
   }
 
   public void init(ParseContext pctx) {
@@ -130,11 +133,12 @@
     ctx = pctx.getContext();
     destTableId = pctx.getDestTableId();
     idToTableNameMap = pctx.getIdToTableNameMap();
+    this.uCtx = pctx.getUCtx();
   }
 
   public ParseContext getParseContext() {
     return new ParseContext(conf, qb, ast, aliasToPruner, aliasToSamplePruner, topOps, 
-                            topSelOps, opParseCtx, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId);
+                            topSelOps, opParseCtx, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx);
   }
   
   @SuppressWarnings("nls")
@@ -2885,6 +2889,8 @@
       Operator leftOp, String rightalias, Operator rightOp)
       throws SemanticException {
 
+    // Currently, the unions are not merged - each union has only 2 parents. So, a n-way union will lead to (n-1) union operators.
+    // This can be easily merged into 1 union
     RowResolver leftRR = opParseCtx.get(leftOp).getRR();
     RowResolver rightRR = opParseCtx.get(rightOp).getRR();
     HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
@@ -2920,18 +2926,23 @@
       ColumnInfo lInfo = lEntry.getValue();
       unionoutRR.put(unionalias, field, lInfo);
     }
-    Operator<? extends Serializable> unionforward = OperatorFactory.get(forwardDesc.class,
-        new RowSchema(unionoutRR.getColumnInfos()));
+    Operator<? extends Serializable> unionforward = 
+      OperatorFactory.getAndMakeChild(new unionDesc(), new RowSchema(unionoutRR.getColumnInfos()));
+
     // set forward operator as child of each of leftOp and rightOp
     List<Operator<? extends Serializable>> child = new ArrayList<Operator<? extends Serializable>>();
     child.add(unionforward);
     rightOp.setChildOperators(child);
+
+    child = new ArrayList<Operator<? extends Serializable>>();
+    child.add(unionforward);
     leftOp.setChildOperators(child);
+
     List<Operator<? extends Serializable>> parent = new ArrayList<Operator<? extends Serializable>>();
     parent.add(leftOp);
     parent.add(rightOp);
     unionforward.setParentOperators(parent);
-
+    
     // create operator info list to return
     return putOpInsertMap(unionforward, unionoutRR);
   }
@@ -3286,13 +3297,14 @@
         getParseContext(), mvTask, this.rootTasks, this.scratchDir, this.randomid, this.pathid,
         new HashMap<Operator<? extends Serializable>, GenMapRedCtx>());
 
-    // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher
-    // generates the plan from the operator tree
+    // create a walker which walks the tree in a DFS manner while maintaining the operator stack. 
+    // The dispatcher generates the plan from the operator tree
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
     opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1());
     opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"), new GenMRRedSink1());
     opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"), new GenMRRedSink2());
     opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1());
+    opRules.put(new RuleRegExp(new String("R4"), "UNION%"), new GenMRUnion1());
 
     // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx);
@@ -3354,7 +3366,7 @@
     
 
     ParseContext pCtx = new ParseContext(conf, qb, ast, aliasToPruner, aliasToSamplePruner, topOps, 
-    		                                 topSelOps, opParseCtx, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId);
+    		                                 topSelOps, opParseCtx, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx);
   
     Optimizer optm = new Optimizer();
     optm.setPctx(pCtx);
@@ -3370,9 +3382,6 @@
     // Do any sample pruning
     genSamplePruners(qb);
     LOG.info("Completed sample pruning");
-    
-    // TODO - this can be extended to create multiple
-    // map reduce plans later
 
     // At this point we have the complete operator tree
     // from which we want to find the reduce operator

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Thu Mar 26 01:26:19 2009
@@ -24,6 +24,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -91,7 +92,7 @@
   public static class NullExprProcessor implements NodeProcessor {
 
     @Override
-    public Object process(Node nd, NodeProcessorCtx procCtx,
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
@@ -118,7 +119,7 @@
   public static class NumExprProcessor implements NodeProcessor {
 
     @Override
-    public Object process(Node nd, NodeProcessorCtx procCtx,
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
@@ -160,7 +161,7 @@
   public static class StrExprProcessor implements NodeProcessor {
 
     @Override
-    public Object process(Node nd, NodeProcessorCtx procCtx,
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
@@ -202,7 +203,7 @@
   public static class BoolExprProcessor implements NodeProcessor {
 
     @Override
-    public Object process(Node nd, NodeProcessorCtx procCtx,
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
@@ -242,7 +243,7 @@
   public static class ColumnExprProcessor implements NodeProcessor {
 
     @Override
-    public Object process(Node nd, NodeProcessorCtx procCtx,
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       exprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
@@ -581,7 +582,7 @@
     }
 
     @Override
-    public Object process(Node nd, NodeProcessorCtx procCtx,
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       ASTNode expr = (ASTNode)nd;

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/unionDesc.java Thu Mar 26 01:26:19 2009
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+
+/**
+ * unionDesc is a empty class currently. 
+ * However, union has more than one input (as compared with forward), and therefore, we need a separate class.
+ **/
+@explain(displayName="Union")
+public class unionDesc implements Serializable {
+  private static final long serialVersionUID = 1L;
+
+  @SuppressWarnings("nls")
+  public unionDesc() { }
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java Thu Mar 26 01:26:19 2009
@@ -23,6 +23,7 @@
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Stack;
 import java.util.TreeSet;
 
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -76,7 +77,7 @@
   /**
    * Implements the process method for the NodeProcessor interface.
    */
-  public Object process(Node nd, NodeProcessorCtx procCtx, Object... nodeOutputs)
+  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
   throws SemanticException {
     ASTNode pt = (ASTNode)nd;
 

Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union.q?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union.q Thu Mar 26 01:26:19 2009
@@ -1,3 +1,5 @@
+-- union case: both subqueries are map jobs on same input, followed by filesink
+
 EXPLAIN
 FROM (
   FROM src select src.key, src.value WHERE src.key < 100

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union10.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union10.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union10.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union10.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,21 @@
+set hive.map.aggr = true;
+
+-- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink
+
+explain 
+  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src s2
+                                        UNION ALL
+                                            select 'tst3' as key, count(1) as value from src s3) unionsrc;
+
+
+
+  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src s2
+                                        UNION ALL
+                                            select 'tst3' as key, count(1) as value from src s3) unionsrc;
+
+
+

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union11.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union11.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union11.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union11.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,20 @@
+set hive.map.aggr = true;
+
+-- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by reducesink
+
+explain 
+  select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src s2
+                                        UNION ALL
+                                            select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key;
+
+
+  select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src s2
+                                        UNION ALL
+                                            select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key;
+
+
+

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union12.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union12.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union12.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union12.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,21 @@
+set hive.map.aggr = true;
+
+-- union case: all subqueries are a map-reduce jobs, 3 way union, different inputs for all sub-queries, followed by filesink
+
+explain 
+  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src1 s2
+                                        UNION ALL
+                                            select 'tst3' as key, count(1) as value from srcbucket s3) unionsrc;
+
+
+
+  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src1 s2
+                                        UNION ALL
+                                            select 'tst3' as key, count(1) as value from srcbucket s3) unionsrc;
+
+
+

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union13.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union13.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union13.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union13.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,8 @@
+-- union case: both subqueries are a map-only jobs, same input, followed by filesink
+
+explain 
+  select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
+                                            select s2.key as key, s2.value as value from src s2) unionsrc;
+
+select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
+                                          select s2.key as key, s2.value as value from src s2) unionsrc;

Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union2.q?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union2.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union2.q Thu Mar 26 01:26:19 2009
@@ -1,3 +1,5 @@
+-- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink
+
 explain 
   select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
                         select s2.key as key, s2.value as value from src s2) unionsrc;

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union4.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union4.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,12 @@
+set hive.map.aggr = true;
+
+-- union case: both subqueries are map-reduce jobs on same input, followed by filesink
+
+explain 
+  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                            select 'tst2' as key, count(1) as value from src s2) unionsrc;
+
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
+                                        UNION  ALL  
+                                          select 'tst2' as key, count(1) as value from src s2) unionsrc;

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union5.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union5.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union5.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union5.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,12 @@
+set hive.map.aggr = true;
+
+-- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink
+
+explain 
+  select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+                                    UNION  ALL  
+                                      select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key;
+
+select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
+                                  UNION  ALL  
+                                    select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key;

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union6.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union6.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union6.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union6.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,14 @@
+set hive.map.aggr = true;
+
+-- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink
+
+explain 
+  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+                                        UNION  ALL  
+                                            select s2.key as key, s2.value as value from src1 s2) unionsrc;
+
+
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+                                      UNION  ALL  
+                                          select s2.key as key, s2.value as value from src1 s2) unionsrc;
+

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union7.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union7.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union7.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union7.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,14 @@
+set hive.map.aggr = true;
+
+-- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink
+
+explain 
+  select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+                                        UNION  ALL  
+                                            select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key;
+
+select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+                                      UNION  ALL  
+                                    select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key;
+
+

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union8.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union8.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union8.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union8.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,10 @@
+-- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink
+
+explain 
+  select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
+                                            select s2.key as key, s2.value as value from src s2 UNION  ALL  
+                                            select s3.key as key, s3.value as value from src s3) unionsrc;
+
+select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
+                                          select s2.key as key, s2.value as value from src s2 UNION  ALL  
+                                          select s3.key as key, s3.value as value from src s3) unionsrc;

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/union9.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/union9.q?rev=758494&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/union9.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/union9.q Thu Mar 26 01:26:19 2009
@@ -0,0 +1,10 @@
+-- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink
+
+explain 
+  select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
+                        select s2.key as key, s2.value as value from src s2 UNION ALL
+                        select s3.key as key, s3.value as value from src s3) unionsrc;
+
+  select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION  ALL  
+                        select s2.key as key, s2.value as value from src s2 UNION ALL
+                        select s3.key as key, s3.value as value from src s3) unionsrc;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/cluster.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/cluster.q.out?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/cluster.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/cluster.q.out Thu Mar 26 01:26:19 2009
@@ -486,7 +486,7 @@
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        /data/users/suresh/hive_external/build/ql/tmp/567040252/108440248.10002 
+        /data/users/njain/hive2/hive/build/ql/tmp/596968299/28368921.10002 
           Reduce Output Operator
             key expressions:
                   expr: 1
@@ -592,7 +592,7 @@
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        /data/users/suresh/hive_external/build/ql/tmp/124737534/554653212.10002 
+        /data/users/njain/hive2/hive/build/ql/tmp/12416576/480017656.10002 
           Reduce Output Operator
             key expressions:
                   expr: 1
@@ -700,7 +700,7 @@
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        /data/users/suresh/hive_external/build/ql/tmp/247667797/531125383.10002 
+        /data/users/njain/hive2/hive/build/ql/tmp/15199144/1962713711.10002 
           Reduce Output Operator
             key expressions:
                   expr: 0
@@ -808,7 +808,7 @@
   Stage: Stage-2
     Map Reduce
       Alias -> Map Operator Tree:
-        /data/users/suresh/hive_external/build/ql/tmp/94432770/53306218.10002 
+        /data/users/njain/hive2/hive/build/ql/tmp/103416384/41425981.10002 
           Reduce Output Operator
             key expressions:
                   expr: 0
@@ -862,6 +862,7 @@
                       type: string
                       expr: value
                       type: string
+                Union
                   Select Operator
                     expressions:
                           expr: 0
@@ -893,6 +894,7 @@
                       type: string
                       expr: value
                       type: string
+                Union
                   Select Operator
                     expressions:
                           expr: 0

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part7.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part7.q.out?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part7.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part7.q.out Thu Mar 26 01:26:19 2009
@@ -24,6 +24,7 @@
                       type: string
                       expr: hr
                       type: string
+                Union
                   Select Operator
                     expressions:
                           expr: 0
@@ -64,6 +65,7 @@
                       type: string
                       expr: hr
                       type: string
+                Union
                   Select Operator
                     expressions:
                           expr: 0
@@ -91,10 +93,10 @@
                             type: string
       Needs Tagging: false
       Path -> Alias:
-        file:/data/users/suresh/hive_external/build/ql/test/data/warehouse/srcpart/hr=11/ds=2008-04-08 
-        file:/data/users/suresh/hive_external/build/ql/test/data/warehouse/srcpart/hr=12/ds=2008-04-08 
+        file:/data/users/njain/hive2/hive/build/ql/test/data/warehouse/srcpart/hr=11/ds=2008-04-08 
+        file:/data/users/njain/hive2/hive/build/ql/test/data/warehouse/srcpart/hr=12/ds=2008-04-08 
       Path -> Partition:
-        file:/data/users/suresh/hive_external/build/ql/test/data/warehouse/srcpart/hr=11/ds=2008-04-08 
+        file:/data/users/njain/hive2/hive/build/ql/test/data/warehouse/srcpart/hr=11/ds=2008-04-08 
           Partition
             partition values:
               ds 2008-04-08
@@ -113,10 +115,10 @@
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
-                location file:/data/users/suresh/hive_external/build/ql/test/data/warehouse/srcpart
+                location file:/data/users/njain/hive2/hive/build/ql/test/data/warehouse/srcpart
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
-        file:/data/users/suresh/hive_external/build/ql/test/data/warehouse/srcpart/hr=12/ds=2008-04-08 
+        file:/data/users/njain/hive2/hive/build/ql/test/data/warehouse/srcpart/hr=12/ds=2008-04-08 
           Partition
             partition values:
               ds 2008-04-08
@@ -135,7 +137,7 @@
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
-                location file:/data/users/suresh/hive_external/build/ql/test/data/warehouse/srcpart
+                location file:/data/users/njain/hive2/hive/build/ql/test/data/warehouse/srcpart
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
       Reduce Operator Tree:
@@ -143,7 +145,7 @@
           File Output Operator
             compressed: false
             GlobalTableId: 0
-            directory: /data/users/suresh/hive_external/ql/../build/ql/tmp/1565954774/21907520.10001.insclause-0
+            directory: /data/users/njain/hive2/hive/ql/../build/ql/tmp/193440808/125134681.10001.insclause-0
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/union.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/union.q.out?rev=758494&r1=758493&r2=758494&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/union.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/union.q.out Thu Mar 26 01:26:19 2009
@@ -20,6 +20,7 @@
                       type: string
                       expr: value
                       type: string
+                Union
                   Select Operator
                     expressions:
                           expr: 0
@@ -43,6 +44,7 @@
                       type: string
                       expr: value
                       type: string
+                Union
                   Select Operator
                     expressions:
                           expr: 0



Mime
View raw message