hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1638907 [1/3] - in /hive/branches/spark/ql/src: java/org/apache/hadoop/hive/ql/exec/spark/ java/org/apache/hadoop/hive/ql/optimizer/spark/ java/org/apache/hadoop/hive/ql/parse/spark/ java/org/apache/hadoop/hive/ql/plan/ test/results/client...
Date Wed, 12 Nov 2014 18:07:22 GMT
Author: xuefu
Date: Wed Nov 12 18:07:16 2014
New Revision: 1638907

URL: http://svn.apache.org/r1638907
Log:
HIVE-8793: Refactor to make splitting SparkWork a physical resolver [Spark Branch] (Rui via Xuefu)

Added:
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java
Modified:
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/input1_limit.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java Wed Nov 12 18:07:16 2014
@@ -18,23 +18,15 @@
 
 package org.apache.hadoop.hive.ql.exec.spark;
 
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
 
 import com.google.common.base.Preconditions;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
 import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat;
 import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
@@ -70,7 +62,7 @@ public class SparkPlanGenerator {
   private Context context;
   private Path scratchDir;
   private SparkReporter sparkReporter;
-  private final Map<BaseWork, BaseWork> cloneToWork;
+  private Map<BaseWork, BaseWork> cloneToWork;
   private final Map<BaseWork, SparkTran> workToTranMap;
   private final Map<BaseWork, SparkTran> workToParentWorkTranMap;
 
@@ -85,7 +77,6 @@ public class SparkPlanGenerator {
     this.context = context;
     this.jobConf = jobConf;
     this.scratchDir = scratchDir;
-    this.cloneToWork = new HashMap<BaseWork, BaseWork>();
     this.workToTranMap = new HashMap<BaseWork, SparkTran>();
     this.workToParentWorkTranMap = new HashMap<BaseWork, SparkTran>();
     this.sparkReporter = sparkReporter;
@@ -93,12 +84,10 @@ public class SparkPlanGenerator {
 
   public SparkPlan generate(SparkWork sparkWork) throws Exception {
     SparkPlan sparkPlan = new SparkPlan();
-    cloneToWork.clear();
+    cloneToWork = sparkWork.getCloneToWork();
     workToTranMap.clear();
     workToParentWorkTranMap.clear();
 
-    splitSparkWork(sparkWork);
-
     for (BaseWork work : sparkWork.getAllWork()) {
       SparkTran tran;
       if (work instanceof MapWork) {
@@ -159,105 +148,6 @@ public class SparkPlanGenerator {
     return result;
   }
 
-
-  private void splitSparkWork(SparkWork sparkWork) {
-    // do a BFS on the sparkWork graph, and look for any work that has more than one child.
-    // If we found such a work, we split it into multiple ones, one for each of its child.
-    Queue<BaseWork> queue = new LinkedList<BaseWork>();
-    Set<BaseWork> visited = new HashSet<BaseWork>();
-    queue.addAll(sparkWork.getRoots());
-    while (!queue.isEmpty()) {
-      BaseWork work = queue.poll();
-      if (!visited.add(work)) {
-        continue;
-      }
-
-      List<BaseWork> childWorks = sparkWork.getChildren(work);
-      // First, add all children of this work into queue, to be processed later.
-      for (BaseWork w : childWorks) {
-        queue.add(w);
-      }
-
-      // Second, check if this work has multiple reduceSinks. If so, do split.
-      splitBaseWork(sparkWork, work, childWorks);
-    }
-  }
-
-  private Set<Operator<?>> getAllReduceSinks(BaseWork work) {
-    Set<Operator<?>> resultSet = work.getAllLeafOperators();
-    Iterator<Operator<?>> it = resultSet.iterator();
-    while (it.hasNext()) {
-      if (!(it.next() instanceof ReduceSinkOperator)) {
-        it.remove();
-      }
-    }
-    return resultSet;
-  }
-
-  // Split work into multiple branches, one for each childWork in childWorks.
-  // It also set up the connection between each parent work and child work.
-  private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List<BaseWork> childWorks) {
-    if (getAllReduceSinks(parentWork).size() <= 1) {
-      return;
-    }
-
-    // Grand-parent works - we need to set these to be the parents of the cloned works.
-    List<BaseWork> grandParentWorks = sparkWork.getParents(parentWork);
-    boolean isFirst = true;
-
-    for (BaseWork childWork : childWorks) {
-      BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork);
-      String childReducerName = childWork.getName();
-      SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork);
-
-      // We need to remove those branches that
-      // 1, ended with a ReduceSinkOperator, and
-      // 2, the ReduceSinkOperator's name is not the same as childReducerName.
-      // Also, if the cloned work is not the first, we remove ALL leaf operators except
-      // the corresponding ReduceSinkOperator.
-      for (Operator<?> op : clonedParentWork.getAllLeafOperators()) {
-        if (op instanceof ReduceSinkOperator) {
-          if (!((ReduceSinkOperator)op).getConf().getOutputName().equals(childReducerName)) {
-            removeOpRecursive(op);
-          }
-        } else if (!isFirst) {
-          removeOpRecursive(op);
-        }
-      }
-
-      isFirst = false;
-
-      // Then, we need to set up the graph connection. Especially:
-      // 1, we need to connect this cloned parent work with all the grand-parent works.
-      // 2, we need to connect this cloned parent work with the corresponding child work.
-      sparkWork.add(clonedParentWork);
-      for (BaseWork gpw : grandParentWorks) {
-        sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork));
-      }
-      sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty);
-      cloneToWork.put(clonedParentWork, parentWork);
-    }
-
-    sparkWork.remove(parentWork);
-  }
-
-  // Remove op from all its parents' child list.
-  // Recursively remove any of its parent who only have this op as child.
-  private void removeOpRecursive(Operator<?> operator) {
-    List<Operator<?>> parentOperators = new ArrayList<Operator<?>>();
-    for (Operator<?> op : operator.getParentOperators()) {
-      parentOperators.add(op);
-    }
-    for (Operator<?> parentOperator : parentOperators) {
-      Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator),
-          "AssertionError: parent of " + operator.getName() + " doesn't have it as child.");
-      parentOperator.removeChild(operator);
-      if (parentOperator.getNumChild() == 0) {
-        removeOpRecursive(parentOperator);
-      }
-    }
-  }
-
   private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveException {
     // MergeFileWork is sub-class of MapWork, we don't need to distinguish here
     if (mWork.getInputformat() != null) {

Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java?rev=1638907&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java Wed Nov 12 18:07:16 2014
@@ -0,0 +1,185 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.spark;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalPlanResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.spark.GenSparkUtils;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+
+import java.io.Serializable;
+import java.util.*;
+
+/**
+ * Do a BFS on the sparkWork graph, and look for any work that has more than one child.
+ * If we found such a work, we split it into multiple ones, one for each of its child.
+ */
+public class SplitSparkWorkResolver implements PhysicalPlanResolver {
+  @Override
+  public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
+    for (Task<? extends Serializable> task : pctx.getRootTasks()) {
+      if (task instanceof SparkTask) {
+        splitSparkWork(((SparkTask) task).getWork());
+      }
+    }
+    return pctx;
+  }
+
+  private void splitSparkWork(SparkWork sparkWork) {
+    Queue<BaseWork> queue = new LinkedList<BaseWork>();
+    Set<BaseWork> visited = new HashSet<BaseWork>();
+    queue.addAll(sparkWork.getRoots());
+    while (!queue.isEmpty()) {
+      BaseWork work = queue.poll();
+      if (!visited.add(work)) {
+        continue;
+      }
+
+      List<BaseWork> childWorks = sparkWork.getChildren(work);
+      // First, add all children of this work into queue, to be processed later.
+      for (BaseWork w : childWorks) {
+        queue.add(w);
+      }
+
+      // Second, check if this work has multiple reduceSinks. If so, do split.
+      splitBaseWork(sparkWork, work, childWorks);
+    }
+  }
+
+  // Split work into multiple branches, one for each childWork in childWorks.
+  // It also set up the connection between each parent work and child work.
+  private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List<BaseWork> childWorks) {
+    if (getAllReduceSinks(parentWork).size() <= 1) {
+      return;
+    }
+
+    // Grand-parent works - we need to set these to be the parents of the cloned works.
+    List<BaseWork> grandParentWorks = sparkWork.getParents(parentWork);
+    boolean isFirst = true;
+
+    for (BaseWork childWork : childWorks) {
+      BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork);
+      // give the cloned work a different name
+      clonedParentWork.setName(clonedParentWork.getName().replaceAll("^([a-zA-Z]+)(\\s+)(\\d+)",
+          "$1$2" + GenSparkUtils.getUtils().getNextSeqNumber()));
+      setStatistics(parentWork, clonedParentWork);
+      String childReducerName = childWork.getName();
+      SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork);
+
+      // We need to remove those branches that
+      // 1, ended with a ReduceSinkOperator, and
+      // 2, the ReduceSinkOperator's name is not the same as childReducerName.
+      // Also, if the cloned work is not the first, we remove ALL leaf operators except
+      // the corresponding ReduceSinkOperator.
+      for (Operator<?> op : clonedParentWork.getAllLeafOperators()) {
+        if (op instanceof ReduceSinkOperator) {
+          if (!((ReduceSinkOperator) op).getConf().getOutputName().equals(childReducerName)) {
+            removeOpRecursive(op);
+          }
+        } else if (!isFirst) {
+          removeOpRecursive(op);
+        }
+      }
+
+      isFirst = false;
+
+      // Then, we need to set up the graph connection. Especially:
+      // 1, we need to connect this cloned parent work with all the grand-parent works.
+      // 2, we need to connect this cloned parent work with the corresponding child work.
+      sparkWork.add(clonedParentWork);
+      for (BaseWork gpw : grandParentWorks) {
+        sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork));
+      }
+      sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty);
+      sparkWork.getCloneToWork().put(clonedParentWork, parentWork);
+    }
+
+    sparkWork.remove(parentWork);
+  }
+
+  private Set<Operator<?>> getAllReduceSinks(BaseWork work) {
+    Set<Operator<?>> resultSet = work.getAllLeafOperators();
+    Iterator<Operator<?>> it = resultSet.iterator();
+    while (it.hasNext()) {
+      if (!(it.next() instanceof ReduceSinkOperator)) {
+        it.remove();
+      }
+    }
+    return resultSet;
+  }
+
+  // Remove op from all its parents' child list.
+  // Recursively remove any of its parent who only have this op as child.
+  private void removeOpRecursive(Operator<?> operator) {
+    List<Operator<?>> parentOperators = new ArrayList<Operator<?>>();
+    for (Operator<?> op : operator.getParentOperators()) {
+      parentOperators.add(op);
+    }
+    for (Operator<?> parentOperator : parentOperators) {
+      Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator),
+          "AssertionError: parent of " + operator.getName() + " doesn't have it as child.");
+      parentOperator.removeChild(operator);
+      if (parentOperator.getNumChild() == 0) {
+        removeOpRecursive(parentOperator);
+      }
+    }
+  }
+
+  // we lost statistics & opTraits through cloning, try to get them back
+  // TODO: make sure this method is sufficient to solve the problem
+  private void setStatistics(BaseWork origin, BaseWork clone) {
+    if (origin instanceof MapWork && clone instanceof MapWork) {
+      MapWork originMW = (MapWork) origin;
+      MapWork cloneMW = (MapWork) clone;
+      for (Map.Entry<String, Operator<? extends OperatorDesc>> entry :
+          originMW.getAliasToWork().entrySet()) {
+        String alias = entry.getKey();
+        Operator<? extends OperatorDesc> cloneOP = cloneMW.getAliasToWork().get(alias);
+        if (cloneOP != null) {
+          setStatistics(entry.getValue(), cloneOP);
+        }
+      }
+    } else if (origin instanceof ReduceWork && clone instanceof ReduceWork) {
+      setStatistics(((ReduceWork) origin).getReducer(), ((ReduceWork) clone).getReducer());
+    }
+  }
+
+  private void setStatistics(Operator<? extends OperatorDesc> origin,
+      Operator<? extends OperatorDesc> clone) {
+    clone.getConf().setStatistics(origin.getConf().getStatistics());
+    clone.getConf().setOpTraits(origin.getConf().getOpTraits());
+    if (origin.getChildOperators().size() == clone.getChildOperators().size()) {
+      for (int i = 0; i < clone.getChildOperators().size(); i++) {
+        setStatistics(origin.getChildOperators().get(i), clone.getChildOperators().get(i));
+      }
+    }
+  }
+}

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Wed Nov 12 18:07:16 2014
@@ -410,4 +410,8 @@ public class GenSparkUtils {
     }
     return null;
   }
+
+  public synchronized int getNextSeqNumber() {
+    return ++sequenceNumber;
+  }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Wed Nov 12 18:07:16 2014
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimiz
 import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism;
 import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory;
+import org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver;
 import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -262,6 +263,8 @@ public class SparkCompiler extends TaskC
     PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
        pCtx.getFetchTask());
 
+    physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
+
     if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
       physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
     } else {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java Wed Nov 12 18:07:16 2014
@@ -49,15 +49,20 @@ public class SparkWork extends AbstractO
   private final Set<BaseWork> roots = new HashSet<BaseWork>();
   private final Set<BaseWork> leaves = new HashSet<BaseWork>();
 
-  protected final Map<BaseWork, List<BaseWork>> workGraph = new HashMap<BaseWork, List<BaseWork>>();
-  protected final Map<BaseWork, List<BaseWork>> invertedWorkGraph = new HashMap<BaseWork, List<BaseWork>>();
+  protected final Map<BaseWork, List<BaseWork>> workGraph =
+      new HashMap<BaseWork, List<BaseWork>>();
+  protected final Map<BaseWork, List<BaseWork>> invertedWorkGraph =
+      new HashMap<BaseWork, List<BaseWork>>();
   protected final Map<Pair<BaseWork, BaseWork>, SparkEdgeProperty> edgeProperties =
       new HashMap<Pair<BaseWork, BaseWork>, SparkEdgeProperty>();
 
   private Map<String, List<String>> requiredCounterPrefix;
 
+  private final Map<BaseWork, BaseWork> cloneToWork;
+
   public SparkWork(String name) {
     this.name = name + ":" + (++counter);
+    cloneToWork = new HashMap<BaseWork, BaseWork>();
   }
 
 
@@ -305,20 +310,25 @@ public class SparkWork extends AbstractO
   @Explain(displayName = "Edges")
   public Map<String, List<Dependency>> getDependencyMap() {
     Map<String, List<Dependency>> result = new LinkedHashMap<String, List<Dependency>>();
-    for (Map.Entry<BaseWork, List<BaseWork>> entry: invertedWorkGraph.entrySet()) {
-      List<Dependency> dependencies = new LinkedList<Dependency>();
-      for (BaseWork d: entry.getValue()) {
-        Dependency dependency = new Dependency();
-        dependency.w = d;
-        dependency.prop = getEdgeProperty(d, entry.getKey());
-        dependencies.add(dependency);
-      }
-      if (!dependencies.isEmpty()) {
-        Collections.sort(dependencies);
-        result.put(entry.getKey().getName(), dependencies);
+    for (BaseWork baseWork : getAllWork()) {
+      if (invertedWorkGraph.get(baseWork) != null && invertedWorkGraph.get(baseWork).size() > 0) {
+        List<Dependency> dependencies = new LinkedList<Dependency>();
+        for (BaseWork d : invertedWorkGraph.get(baseWork)) {
+          Dependency dependency = new Dependency();
+          dependency.w = d;
+          dependency.prop = getEdgeProperty(d, baseWork);
+          dependencies.add(dependency);
+        }
+        if (!dependencies.isEmpty()) {
+          Collections.sort(dependencies);
+          result.put(baseWork.getName(), dependencies);
+        }
       }
     }
     return result;
   }
 
+  public Map<BaseWork, BaseWork> getCloneToWork() {
+    return cloneToWork;
+  }
 }

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out Wed Nov 12 18:07:16 2014
@@ -56,9 +56,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 1)
+        Reducer 6 <- Map 1 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -72,34 +73,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: int)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
-                Group By Operator
-                  aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: double), _col2 (type: double)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -140,6 +113,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double), _col2 (type: double)
 
   Stage: Stage-3
     Dependency Collection
@@ -268,9 +273,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 1)
+        Reducer 6 <- Map 1 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -284,34 +290,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: int)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
-                Group By Operator
-                  aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: double), _col2 (type: double)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -352,6 +330,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double), _col2 (type: double)
 
   Stage: Stage-3
     Dependency Collection
@@ -482,9 +492,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 1)
+        Reducer 6 <- Map 1 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -498,34 +509,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: int)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: double), _col2 (type: bigint)
-                Group By Operator
-                  aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:string>)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -566,6 +549,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double), _col2 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:string>)
 
   Stage: Stage-3
     Dependency Collection

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out Wed Nov 12 18:07:16 2014
@@ -44,9 +44,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 1)
+        Reducer 6 <- Map 1 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -60,34 +61,6 @@ STAGE PLANS:
                     Map-reduce partition columns: key (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     value expressions: value (type: string), substr(value, 5) (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
-                Group By Operator
-                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
-                  keys: VALUE._col1 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -128,6 +101,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+                  keys: VALUE._col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
 
   Stage: Stage-3
     Dependency Collection

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out Wed Nov 12 18:07:16 2014
@@ -40,11 +40,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP, 31)
-        Reducer 3 <- Map 1 (GROUP, 31)
+        Reducer 2 <- Map 4 (GROUP, 31)
+        Reducer 3 <- Map 5 (GROUP, 31)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -65,6 +65,11 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: double)
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: key, value

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out Wed Nov 12 18:07:16 2014
@@ -40,13 +40,13 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+        Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31)
         Reducer 3 <- Reducer 2 (GROUP, 31)
-        Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+        Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 31)
         Reducer 5 <- Reducer 4 (GROUP, 31)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -67,6 +67,11 @@ STAGE PLANS:
                         Map-reduce partition columns: rand() (type: double)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: double)
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: key, value

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out Wed Nov 12 18:07:16 2014
@@ -40,11 +40,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP, 31)
-        Reducer 3 <- Map 1 (GROUP, 31)
+        Reducer 2 <- Map 4 (GROUP, 31)
+        Reducer 3 <- Map 5 (GROUP, 31)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -59,6 +59,11 @@ STAGE PLANS:
                       Map-reduce partition columns: key (type: string)
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       value expressions: substr(value, 5) (type: string)
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: key, value

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 31)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 31)
+        Reducer 6 <- Map 1 (SORT, 31)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -60,44 +61,6 @@ STAGE PLANS:
                       Map-reduce partition columns: key (type: string)
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                       value expressions: substr(value, 5) (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: double)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    Limit
-                      Number of rows: 10
-                      Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col1 (type: double)
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: double)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    Limit
-                      Number of rows: 10
-                      Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col1 (type: double)
         Reducer 3 
             Reduce Operator Tree:
               Select Operator
@@ -140,6 +103,48 @@ STAGE PLANS:
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                           serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                           name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(VALUE._col0)
+                  keys: KEY._col0 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: double)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Limit
+                      Number of rows: 10
+                      Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string), _col1 (type: double)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(VALUE._col0)
+                  keys: KEY._col0 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: double)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Limit
+                      Number of rows: 10
+                      Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string), _col1 (type: double)
 
   Stage: Stage-3
     Dependency Collection

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 1)
+        Reducer 6 <- Map 1 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -56,34 +57,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
 
   Stage: Stage-3
     Dependency Collection
@@ -828,9 +833,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 4 <- Reducer 2 (GROUP, 1)
+        Reducer 3 <- Reducer 5 (GROUP, 1)
+        Reducer 4 <- Reducer 6 (GROUP, 1)
+        Reducer 5 <- Map 1 (SORT, 1)
+        Reducer 6 <- Map 1 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -844,34 +850,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -912,6 +890,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
 
   Stage: Stage-3
     Dependency Collection

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 31)
-        Reducer 3 <- Reducer 2 (GROUP, 31)
-        Reducer 4 <- Reducer 2 (GROUP, 31)
+        Reducer 3 <- Reducer 5 (GROUP, 31)
+        Reducer 4 <- Reducer 6 (GROUP, 31)
+        Reducer 5 <- Map 1 (SORT, 31)
+        Reducer 6 <- Map 1 (SORT, 31)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -56,34 +57,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
 
   Stage: Stage-3
     Dependency Collection

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 31)
-        Reducer 3 <- Reducer 2 (GROUP, 31)
-        Reducer 4 <- Reducer 2 (GROUP, 31)
+        Reducer 3 <- Reducer 5 (GROUP, 31)
+        Reducer 4 <- Reducer 6 (GROUP, 31)
+        Reducer 5 <- Map 1 (SORT, 31)
+        Reducer 6 <- Map 1 (SORT, 31)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -56,34 +57,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
 
   Stage: Stage-3
     Dependency Collection

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (SORT, 31)
-        Reducer 3 <- Reducer 2 (GROUP, 31)
-        Reducer 4 <- Reducer 2 (GROUP, 31)
+        Reducer 3 <- Reducer 5 (GROUP, 31)
+        Reducer 4 <- Reducer 6 (GROUP, 31)
+        Reducer 5 <- Map 1 (SORT, 31)
+        Reducer 6 <- Map 1 (SORT, 31)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -56,34 +57,6 @@ STAGE PLANS:
                     Map-reduce partition columns: substr(value, 5) (type: string)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                     value expressions: key (type: string)
-        Reducer 2 
-            Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col0)
-                  keys: VALUE._col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: bigint)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.dest2
+        Reducer 5 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Forward
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT KEY._col0)
+                  keys: VALUE._col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: bigint)
 
   Stage: Stage-3
     Dependency Collection



Mime
View raw message