hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1622783 [3/3] - in /hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ data/files/ hbase-handler/ itests/src/test/resources/ itests/util/src/main/java/org/apache/hadoop/hive/ql/ ql/ ql/if/ ql/src/gen/thrift/gen-cpp/ ql/src/gen/thr...
Date Fri, 05 Sep 2014 20:16:10 GMT
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Fri Sep  5 20:16:08
2014
@@ -21,20 +21,24 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Deque;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.Stack;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
@@ -52,20 +56,25 @@ import org.apache.hadoop.hive.ql.lib.For
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate;
 import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
+import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization;
 import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
+import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize;
 import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
+import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits;
 import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
 import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
-import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
+import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
+import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -85,7 +94,7 @@ public class TezCompiler extends TaskCom
   @Override
   public void init(HiveConf conf, LogHelper console, Hive db) {
     super.init(conf, console, db);
-    
+
     // Tez requires us to use RPC for the query plan
     HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
 
@@ -98,31 +107,203 @@ public class TezCompiler extends TaskCom
   protected void optimizeOperatorPlan(ParseContext pCtx, Set<ReadEntity> inputs,
       Set<WriteEntity> outputs) throws SemanticException {
 
-    // Sequence of TableScan operators to be walked
+    // Create the context for the walker
+    OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx, inputs, outputs);
+
+    // setup dynamic partition pruning where possible
+    runDynamicPartitionPruning(procCtx, inputs, outputs);
+
+    // setup stats in the operator plan
+    runStatsAnnotation(procCtx);
+
+    // run the optimizations that use stats for optimization
+    runStatsDependentOptimizations(procCtx, inputs, outputs);
+
+    // after the stats phase we might have some cyclic dependencies that we need
+    // to take care of.
+    runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
+
+  }
+
+  private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx,
+      Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException
{
+
+    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
+      return;
+    }
+
+    boolean cycleFree = false;
+    while (!cycleFree) {
+      cycleFree = true;
+      Set<Set<Operator<?>>> components = getComponents(procCtx);
+      for (Set<Operator<?>> component : components) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Component: ");
+          for (Operator<?> co : component) {
+            LOG.debug("Operator: " + co.getName() + ", " + co.getIdentifier());
+          }
+        }
+        if (component.size() != 1) {
+          LOG.info("Found cycle in operator plan...");
+          cycleFree = false;
+          removeEventOperator(component);
+        }
+      }
+      LOG.info("Cycle free: " + cycleFree);
+    }
+  }
+
+  private void removeEventOperator(Set<Operator<?>> component) {
+    AppMasterEventOperator victim = null;
+    for (Operator<?> o : component) {
+      if (o instanceof AppMasterEventOperator) {
+        if (victim == null
+            || o.getConf().getStatistics().getDataSize() < victim.getConf().getStatistics()
+                .getDataSize()) {
+          victim = (AppMasterEventOperator) o;
+        }
+      }
+    }
+
+    Operator<?> child = victim;
+    Operator<?> curr = victim;
+
+    while (curr.getChildOperators().size() <= 1) {
+      child = curr;
+      curr = curr.getParentOperators().get(0);
+    }
+
+    // at this point we've found the fork in the op pipeline that has the
+    // pruning as a child plan.
+    LOG.info("Disabling dynamic pruning for: "
+        + ((DynamicPruningEventDesc) victim.getConf()).getTableScan().toString()
+        + ". Needed to break cyclic dependency");
+    curr.removeChild(child);
+  }
+
+  // Tarjan's algo
+  private Set<Set<Operator<?>>> getComponents(OptimizeTezProcContext procCtx)
{
     Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
-    deque.addAll(pCtx.getTopOps().values());
+    deque.addAll(procCtx.parseContext.getTopOps().values());
 
-    // Create the context for the walker
-    OptimizeTezProcContext procCtx
-      = new OptimizeTezProcContext(conf, pCtx, inputs, outputs, deque);
+    AtomicInteger index = new AtomicInteger();
+    Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>();
+    Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>();
+    Stack<Operator<?>> nodes = new Stack<Operator<?>>();
+    Set<Set<Operator<?>>> components = new HashSet<Set<Operator<?>>>();
+
+    for (Operator<?> o : deque) {
+      if (!indexes.containsKey(o)) {
+        connect(o, index, nodes, indexes, lowLinks, components);
+      }
+    }
+
+    return components;
+  }
+
+  private void connect(Operator<?> o, AtomicInteger index, Stack<Operator<?>>
nodes,
+      Map<Operator<?>, Integer> indexes, Map<Operator<?>, Integer>
lowLinks,
+      Set<Set<Operator<?>>> components) {
+
+    indexes.put(o, index.get());
+    lowLinks.put(o, index.get());
+    index.incrementAndGet();
+    nodes.push(o);
+
+    List<Operator<?>> children;
+    if (o instanceof AppMasterEventOperator) {
+      children = new ArrayList<Operator<?>>();
+      children.addAll(o.getChildOperators());
+      TableScanOperator ts = ((DynamicPruningEventDesc) o.getConf()).getTableScan();
+      LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString());
+      children.add(ts);
+    } else {
+      children = o.getChildOperators();
+    }
+
+    for (Operator<?> child : children) {
+      if (!indexes.containsKey(child)) {
+        connect(child, index, nodes, indexes, lowLinks, components);
+        lowLinks.put(child, Math.min(lowLinks.get(o), lowLinks.get(child)));
+      } else if (nodes.contains(child)) {
+        lowLinks.put(o, Math.min(lowLinks.get(o), indexes.get(child)));
+      }
+    }
+
+    if (lowLinks.get(o).equals(indexes.get(o))) {
+      Set<Operator<?>> component = new HashSet<Operator<?>>();
+      components.add(component);
+      Operator<?> current;
+      do {
+        current = nodes.pop();
+        component.add(current);
+      } while (current != o);
+    }
+  }
+
+  private void runStatsAnnotation(OptimizeTezProcContext procCtx) throws SemanticException
{
+    new AnnotateWithStatistics().transform(procCtx.parseContext);
+    new AnnotateWithOpTraits().transform(procCtx.parseContext);
+  }
+
+  private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx,
+      Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException
{
+
+    // Sequence of TableScan operators to be walked
+    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
+    deque.addAll(procCtx.parseContext.getTopOps().values());
 
     // create a walker which walks the tree in a DFS manner while maintaining
     // the operator stack.
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-    opRules.put(new RuleRegExp(new String("Set parallelism - ReduceSink"),
+    opRules.put(new RuleRegExp("Set parallelism - ReduceSink",
         ReduceSinkOperator.getOperatorName() + "%"),
         new SetReducerParallelism());
 
-    opRules.put(new RuleRegExp(new String("Convert Join to Map-join"),
+    opRules.put(new RuleRegExp("Convert Join to Map-join",
         JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
 
+    opRules.put(
+        new RuleRegExp("Remove dynamic pruning by size",
+        AppMasterEventOperator.getOperatorName() + "%"),
+        new RemoveDynamicPruningBySize());
+
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
     List<Node> topNodes = new ArrayList<Node>();
-    topNodes.addAll(pCtx.getTopOps().values());
+    topNodes.addAll(procCtx.parseContext.getTopOps().values());
+    GraphWalker ogw = new ForwardWalker(disp);
+    ogw.startWalking(topNodes, null);
+  }
+
+  private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx, Set<ReadEntity>
inputs,
+      Set<WriteEntity> outputs) throws SemanticException {
+
+    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
+      return;
+    }
+
+    // Sequence of TableScan operators to be walked
+    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
+    deque.addAll(procCtx.parseContext.getTopOps().values());
+
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+    opRules.put(
+        new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName()
+            + "%"), new DynamicPartitionPruningOptimization());
+
+    // The dispatcher fires the processor corresponding to the closest matching
+    // rule and passes the context along
+    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
+    List<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(procCtx.parseContext.getTopOps().values());
     GraphWalker ogw = new ForwardWalker(disp);
     ogw.startWalking(topNodes, null);
+
+    // need a new run of the constant folding because we might have created lots
+    // of "and true and true" conditions.
+    new ConstantPropagate().transform(procCtx.parseContext);
   }
 
   @Override
@@ -158,19 +339,12 @@ public class TezCompiler extends TaskCom
         new ProcessAnalyzeTable(GenTezUtils.getUtils()));
 
     opRules.put(new RuleRegExp("Remember union",
-        UnionOperator.getOperatorName() + "%"), new NodeProcessor()
-    {
-      @Override
-      public Object process(Node n, Stack<Node> s,
-          NodeProcessorCtx procCtx, Object... os) throws SemanticException {
-        GenTezProcContext context = (GenTezProcContext) procCtx;
-        UnionOperator union = (UnionOperator) n;
-
-        // simply need to remember that we've seen a union.
-        context.currentUnionOperators.add(union);
-        return null;
-      }
-    });
+        UnionOperator.getOperatorName() + "%"),
+        new UnionProcessor());
+
+    opRules.put(new RuleRegExp("AppMasterEventOperator",
+        AppMasterEventOperator.getOperatorName() + "%"),
+        new AppMasterEventProcessor());
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
@@ -185,10 +359,17 @@ public class TezCompiler extends TaskCom
       GenTezUtils.getUtils().removeUnionOperators(conf, procCtx, w);
     }
 
-    // finally make sure the file sink operators are set up right
+    // then we make sure the file sink operators are set up right
     for (FileSinkOperator fileSink: procCtx.fileSinkSet) {
       GenTezUtils.getUtils().processFileSink(procCtx, fileSink);
     }
+
+    // and finally we hook up any events that need to be sent to the tez AM
+    LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
+    for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
+      LOG.debug("Handling AppMasterEventOperator: " + event);
+      GenTezUtils.getUtils().processAppMasterEvent(procCtx, event);
+    }
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Fri Sep  5 20:16:08
2014
@@ -26,9 +26,9 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Set;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -118,6 +118,14 @@ public class MapWork extends BaseWork {
 
   private boolean dummyTableScan = false;
 
+  // used for dynamic partitioning
+  private Map<String, List<TableDesc>> eventSourceTableDescMap =
+      new LinkedHashMap<String, List<TableDesc>>();
+  private Map<String, List<String>> eventSourceColumnNameMap =
+      new LinkedHashMap<String, List<String>>();
+  private Map<String, List<ExprNodeDesc>> eventSourcePartKeyExprMap =
+      new LinkedHashMap<String, List<ExprNodeDesc>>();
+
   public MapWork() {}
 
   public MapWork(String name) {
@@ -535,4 +543,28 @@ public class MapWork extends BaseWork {
   public boolean getDummyTableScan() {
     return dummyTableScan;
   }
+
+  public void setEventSourceTableDescMap(Map<String, List<TableDesc>> map) {
+    this.eventSourceTableDescMap = map;
+  }
+
+  public Map<String, List<TableDesc>> getEventSourceTableDescMap() {
+    return eventSourceTableDescMap;
+  }
+
+  public void setEventSourceColumnNameMap(Map<String, List<String>> map) {
+    this.eventSourceColumnNameMap = map;
+  }
+
+  public Map<String, List<String>> getEventSourceColumnNameMap() {
+    return eventSourceColumnNameMap;
+  }
+
+  public Map<String, List<ExprNodeDesc>> getEventSourcePartKeyExprMap() {
+    return eventSourcePartKeyExprMap;
+  }
+
+  public void setEventSourcePartKeyExprMap(Map<String, List<ExprNodeDesc>> map)
{
+    this.eventSourcePartKeyExprMap = map;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Fri Sep  5 20:16:08
2014
@@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.LinkedHashSet;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -32,13 +32,9 @@ import org.apache.hadoop.hive.ql.exec.Fi
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.mapred.JobConf;
@@ -99,7 +95,7 @@ public class ReduceWork extends BaseWork
   private ObjectInspector keyObjectInspector = null;
   private ObjectInspector valueObjectInspector = null;
 
-  private Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String, Integer>();
+  private final Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String,
Integer>();
 
   /**
    * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc
pointing
@@ -118,7 +114,7 @@ public class ReduceWork extends BaseWork
   private ObjectInspector getObjectInspector(TableDesc desc) {
     ObjectInspector objectInspector;
     try {
-      Deserializer deserializer = (SerDe) ReflectionUtils.newInstance(desc
+      Deserializer deserializer = ReflectionUtils.newInstance(desc
                 .getDeserializerClass(), null);
       SerDeUtils.initializeSerDe(deserializer, null, desc.getProperties(), null);
       objectInspector = deserializer.getObjectInspector();
@@ -239,7 +235,6 @@ public class ReduceWork extends BaseWork
 
   @Override
   public void replaceRoots(Map<Operator<?>, Operator<?>> replacementMap)
{
-    assert replacementMap.size() == 1;
     setReducer(replacementMap.get(getReducer()));
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Fri Sep  5 20:16:08
2014
@@ -906,11 +906,8 @@ public final class OpProcFactory {
     }
 
     ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
-    if(!(condn instanceof ExprNodeGenericFuncDesc)) {
-      return null;
-    }
 
-    if (op instanceof TableScanOperator) {
+    if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc)
{
       boolean pushFilterToStorage;
       HiveConf hiveConf = owi.getParseContext().getConf();
       pushFilterToStorage =

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java Fri Sep  5
20:16:08 2014
@@ -33,7 +33,6 @@ import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -54,7 +53,7 @@ import org.apache.hadoop.hive.ql.session
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.tez.client.TezSession;
+import org.apache.tez.client.TezClient;
 import org.apache.tez.dag.api.DAG;
 import org.apache.tez.dag.api.Edge;
 import org.apache.tez.dag.api.EdgeProperty;
@@ -75,7 +74,7 @@ public class TestTezTask {
   ReduceWork[] rws;
   TezWork work;
   TezTask task;
-  TezSession session;
+  TezClient session;
   TezSessionState sessionState;
   JobConf conf;
   LocalResource appLr;
@@ -97,18 +96,18 @@ public class TestTezTask {
           @Override
           public Vertex answer(InvocationOnMock invocation) throws Throwable {
             Object[] args = invocation.getArguments();
-            return new Vertex(((BaseWork)args[1]).getName(),
+            return Vertex.create(((BaseWork)args[1]).getName(),
                 mock(ProcessorDescriptor.class), 0, mock(Resource.class));
           }
         });
 
-    when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(JobConf.class),
+    when(utils.createEdge(any(JobConf.class), any(Vertex.class),
         any(Vertex.class), any(TezEdgeProperty.class))).thenAnswer(new Answer<Edge>()
{
 
           @Override
           public Edge answer(InvocationOnMock invocation) throws Throwable {
             Object[] args = invocation.getArguments();
-            return new Edge((Vertex)args[1], (Vertex)args[3], mock(EdgeProperty.class));
+            return Edge.create((Vertex)args[1], (Vertex)args[2], mock(EdgeProperty.class));
           }
         });
 
@@ -158,10 +157,10 @@ public class TestTezTask {
     appLr = mock(LocalResource.class);
 
     SessionState.start(new HiveConf());
-    session = mock(TezSession.class);
+    session = mock(TezClient.class);
     sessionState = mock(TezSessionState.class);
     when(sessionState.getSession()).thenReturn(session);
-    when(session.submitDAG(any(DAG.class), any(Map.class)))
+    when(session.submitDAG(any(DAG.class)))
       .thenThrow(new SessionNotRunning(""))
       .thenReturn(mock(DAGClient.class));
   }
@@ -186,7 +185,7 @@ public class TestTezTask {
       for (BaseWork x: work.getChildren(w)) {
         boolean found = false;
         for (Vertex u: outs) {
-          if (u.getVertexName().equals(x.getName())) {
+          if (u.getName().equals(x.getName())) {
             found = true;
             break;
           }
@@ -204,12 +203,12 @@ public class TestTezTask {
 
   @Test
   public void testSubmit() throws Exception {
-    DAG dag = new DAG("test");
+    DAG dag = DAG.create("test");
     task.submit(conf, dag, path, appLr, sessionState, new LinkedList());
     // validate close/reopen
     verify(sessionState, times(1)).open(any(HiveConf.class));
     verify(sessionState, times(1)).close(eq(false));  // now uses pool after HIVE-7043
-    verify(session, times(2)).submitDAG(any(DAG.class), any(Map.class));
+    verify(session, times(2)).submitDAG(any(DAG.class));
   }
 
   @Test

Modified: hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out (original) and
hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out Fri Sep  5 20:16:08
2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
Fri Sep  5 20:16:08 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
Fri Sep  5 20:16:08 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
Fri Sep  5 20:16:08 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out Fri Sep
 5 20:16:08 2014
@@ -83,7 +83,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[16][bigTable=a] in task 'Map 3' is a cross product
+Warning: Map Join MAPJOIN[18][bigTable=a] in task 'Map 3' is a cross product
 PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
@@ -171,7 +171,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[23][bigTable=a] in task 'Map 4' is a cross product
+Warning: Map Join MAPJOIN[25][bigTable=a] in task 'Map 4' is a cross product
 PREHOOK: query: explain select * from A join 
          (select d1.key 
           from B d1 join B d2 on d1.key = d2.key 
@@ -396,7 +396,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 5' is a cross product
+Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Reducer 5' is a cross product
 PREHOOK: query: explain select * from 
 (select A.key from A group by key) ss join 
 (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out (original) and
hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out Fri Sep  5 20:16:08
2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out (original)
and hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out Fri Sep  5 20:16:08
2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out Fri Sep  5
20:16:08 2014
@@ -15,11 +15,9 @@ STAGE PLANS:
         TableScan
           alias: _dummy_table
           Row Limit Per Split: 1
-          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
           Select Operator
             expressions: 'a' (type: string), 100 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: select 'a', 100
@@ -50,11 +48,9 @@ STAGE PLANS:
         TableScan
           alias: _dummy_table
           Row Limit Per Split: 1
-          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
           Select Operator
             expressions: 2 (type: int)
             outputColumnNames: _col0
-            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: select 1 + 1
@@ -137,11 +133,9 @@ STAGE PLANS:
         TableScan
           alias: _dummy_table
           Row Limit Per Split: 1
-          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
           Select Operator
             expressions: 'a' (type: string), 100 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: select 'a', 100
@@ -170,11 +164,9 @@ STAGE PLANS:
         TableScan
           alias: _dummy_table
           Row Limit Per Split: 1
-          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
           Select Operator
             expressions: 2 (type: int)
             outputColumnNames: _col0
-            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: select 1 + 1
@@ -262,7 +254,7 @@ STAGE PLANS:
                   Row Limit Per Split: 1
                   Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats:
COMPLETE
                   Select Operator
-                    expressions: 5 (type: int), (1 + 2) (type: int)
+                    expressions: 5 (type: int), 3 (type: int)
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats:
COMPLETE
                     File Output Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/temp_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/temp_table.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/temp_table.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/temp_table.q.out Fri Sep  5 20:16:08
2014
@@ -168,14 +168,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: foo
-          Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: key (type: string), value (type: string)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats:
NONE
             Limit
               Number of rows: 10
-              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats:
NONE
               ListSink
 
 PREHOOK: query: select * from foo limit 10

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_string_concat.q.out Fri Sep 
5 20:16:08 2014
@@ -117,14 +117,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: over1korc
-          Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats:
NONE
           Select Operator
             expressions: s (type: string), concat(concat('      ', s), '      ') (type: string),
concat(concat('|', rtrim(concat(concat('      ', s), '      '))), '|') (type: string)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats:
NONE
             Limit
               Number of rows: 20
-              Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats:
NONE
               ListSink
 
 PREHOOK: query: SELECT s AS `string`,

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out Fri
Sep  5 20:16:08 2014
@@ -924,14 +924,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: alltypesorc
-          Statistics: Num rows: 1193 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
           Filter Operator
             predicate: (((((cstring1 rlike 'a.*') and (cstring2 like '%ss%')) or ((1 <>
cboolean2) and ((csmallint < 79.553) and (-257 <> ctinyint)))) or ((cdouble >
ctinyint) and (cfloat >= cint))) or ((cint < cbigint) and (ctinyint > cbigint)))
(type: boolean)
-            Statistics: Num rows: 959 Data size: 303244 Basic stats: COMPLETE Column stats:
NONE
             Select Operator
               expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp),
cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type:
float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint),
(-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - cint) (type: double),
(- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint
- (- csmallint)) + (- csmallint)) (type: smallint), (cint / cint) (type: double), ((-863.257
- cint) - -26.28) (type: double), (- cfloat) (type: float), (cdouble * -89010) (type: double),
(ctinyint / 988888) (type: double), (- ctinyint) (type: tinyint), (79.553 / ctinyint) (type:
double)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19,
_col20, _col21, _col22
-              Statistics: Num rows: 959 Data size: 303244 Basic stats: COMPLETE Column stats:
NONE
               ListSink
 
 PREHOOK: query: SELECT cint,
@@ -2317,14 +2314,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: alltypesorc
-          Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
           Filter Operator
             predicate: (((((197.0 > ctinyint) and (cint = cbigint)) or (cbigint = 359))
or (cboolean1 < 0)) or ((cstring1 like '%ss') and (cfloat <= ctinyint))) (type: boolean)
-            Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
             Select Operator
               expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string),
cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type:
timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean),
(cint / cbigint) (type: double), (cbigint % 79.553) (type: double), (- (cint / cbigint)) (type:
double), (10.175 % cfloat) (type: double), (- cfloat) (type: float), (cfloat - (- cfloat))
(type: float), ((cfloat - (- cfloat)) % -6432) (type: float), (cdouble * csmallint) (type:
double), (- cdouble) (type: double), (- cbigint) (type: bigint), (cfloat - (cint / cbigint))
(type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359 - cdouble)
(type: double), (- csmallint) (type: smallint)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19,
_col20, _col21, _col22, _col23, _col24
-              Statistics: Num rows: 1347 Data size: 377237 Basic stats: COMPLETE Column stats:
NONE
               ListSink
 
 PREHOOK: query: SELECT cint,

Modified: hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java?rev=1622783&r1=1622782&r2=1622783&view=diff
==============================================================================
--- hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java (original)
+++ hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java Fri
Sep  5 20:16:08 2014
@@ -302,6 +302,7 @@ public class Hadoop23Shims extends Hadoo
 
       mr = new MiniTezCluster("hive", numberOfTaskTrackers);
       conf.set("fs.defaultFS", nameNode);
+      conf.set("tez.am.log.level", "DEBUG");
       conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");
       mr.init(conf);
       mr.start();



Mime
View raw message