tez-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hit...@apache.org
Subject tez git commit: TEZ-2190. TestOrderedWordCount fails when generateSplitsInClient set to true. (hitesh)
Date Wed, 11 Mar 2015 01:48:19 GMT
Repository: tez
Updated Branches:
  refs/heads/branch-0.6 b68e698fa -> 200b42b7d


TEZ-2190. TestOrderedWordCount fails when generateSplitsInClient set to true. (hitesh)

(cherry picked from commit 35a2f3c47653f0556d72cb45dbc48abfe4a8d5bd)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/200b42b7
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/200b42b7
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/200b42b7

Branch: refs/heads/branch-0.6
Commit: 200b42b7da3cffdeb0ef2d426981a8a34e0b1007
Parents: b68e698
Author: Hitesh Shah <hitesh@apache.org>
Authored: Tue Mar 10 18:46:20 2015 -0700
Committer: Hitesh Shah <hitesh@apache.org>
Committed: Tue Mar 10 18:48:31 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../main/java/org/apache/tez/dag/api/DAG.java   | 18 +++++++++++----
 .../org/apache/tez/dag/api/TestDAGVerify.java   | 24 ++++++++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/200b42b7/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 94ba1a7..2a05b0a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.6.1: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2190. TestOrderedWordCount fails when generateSplitsInClient set to true.
   TEZ-2091. Add support for hosting TEZ_UI with nodejs.
   TEZ-2165. Tez UI: DAG shows running status if killed by RM in some cases.
   TEZ-2158. TEZ UI: Display dag/vertex names, and task/attempt index in breadcrumb.

http://git-wip-us.apache.org/repos/asf/tez/blob/200b42b7/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
index 4b4294a..ebdcb79 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
@@ -383,17 +383,27 @@ public class DAG {
     // 3. has custom vertex manager
     for (Vertex vertex : vertices.values()) {
       if (vertex.getParallelism() == -1) {
-        boolean hasInputInititlaizer = false;
-        if (vertex.getDataSources()!= null && !vertex.getDataSources().isEmpty())
{
+        boolean hasInputInitializer = false;
+        if (vertex.getDataSources() != null && !vertex.getDataSources().isEmpty())
{
           for (DataSourceDescriptor ds : vertex.getDataSources()) {
             if (ds.getInputInitializerDescriptor() != null) {
-              hasInputInititlaizer = true;
+              hasInputInitializer = true;
               break;
             }
           }
         }
-        if (hasInputInititlaizer) {
+        if (hasInputInitializer) {
           continue;
+        } else {
+          // Account for the case where the vertex has a data source with a determined number
of
+          // shards e.g. splits calculated on the client and not in the AM
+          // In this case, vertex parallelism is setup later using the data source's numShards
+          // and as a result, an initializer is not needed.
+          if (vertex.getDataSources() != null
+              && vertex.getDataSources().size() == 1
+              &&  vertex.getDataSources().get(0).getNumberOfShards() > -1) {
+            continue;
+          }
         }
 
         boolean has1to1UninitedSources = false;

http://git-wip-us.apache.org/repos/asf/tez/blob/200b42b7/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
----------------------------------------------------------------------
diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
index 4793c25..3f8bc93 100644
--- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
+++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
@@ -1152,4 +1152,28 @@ public class TestDAGVerify {
         InputDescriptor.create(dummyInputClassName))));
     dag.verify();
   }
+
+  @Test
+  public void testDAGWithSplitsOnClient() {
+    DAG dag = DAG.create("testDag");
+
+    // Mimic map which has a data source and shards set when splits are generated in the
client
+    Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName));
+    v1.addDataSource("input", DataSourceDescriptor.create(
+        InputDescriptor.create(dummyInputClassName), null, 10, null, null, null));
+    dag.addVertex(v1);
+
+    Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create(dummyProcessorClassName));
+    dag.addVertex(v2);
+
+    dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(
+        DataMovementType.ONE_TO_ONE,
+        DataSourceType.PERSISTED,
+        SchedulingType.SEQUENTIAL,
+        OutputDescriptor.create(dummyOutputClassName),
+        InputDescriptor.create(dummyInputClassName))));
+
+    dag.verify();
+  }
+
 }


Mime
View raw message