crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gr...@apache.org
Subject git commit: CRUNCH-456 Abbreviate long node names in dot file
Date Tue, 05 Aug 2014 15:31:19 GMT
Repository: crunch
Updated Branches:
  refs/heads/master 21965a6e2 -> 5d43171a1


CRUNCH-456 Abbreviate long node names in dot file

Abbreviate node names down to 300 characters in job plan dot files
to ensure that the output dot files are both valid and readable.


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/5d43171a
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/5d43171a
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/5d43171a

Branch: refs/heads/master
Commit: 5d43171a140ddfe84aee602be473b1226e784770
Parents: 21965a6
Author: Gabriel Reid <greid@apache.org>
Authored: Tue Aug 5 08:51:58 2014 +0200
Committer: Gabriel Reid <greid@apache.org>
Committed: Tue Aug 5 17:30:01 2014 +0200

----------------------------------------------------------------------
 .../crunch/impl/mr/plan/DotfileWriter.java      | 54 ++++++++++++++------
 .../crunch/impl/mr/plan/DotfileWriterTest.java  | 22 ++++++--
 2 files changed, 57 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/5d43171a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
index 4d88296..de96852 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
@@ -21,19 +21,19 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.crunch.Pair;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.dist.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-
 import com.google.common.base.Joiner;
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+import org.apache.commons.lang.StringUtils;
+import org.apache.crunch.Pair;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.dist.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
 
 /**
  * Writes <a href="http://www.graphviz.org">Graphviz</a> dot files to illustrate
@@ -41,8 +41,11 @@ import com.google.common.collect.Sets;
  */
 public class DotfileWriter {
 
+  // Maximum length that a node name may have in the produced dot file
+  static final int MAX_NODE_NAME_LENGTH = 300;
+
   /** The types of tasks within a MapReduce job. */
-  enum MRTaskType { MAP, REDUCE };
+  enum MRTaskType { MAP, REDUCE }
 
   private Set<JobPrototype> jobPrototypes = Sets.newHashSet();
   private HashMultimap<Pair<JobPrototype, MRTaskType>, String> jobNodeDeclarations
= HashMultimap.create();
@@ -61,7 +64,9 @@ public class DotfileWriter {
     if (pcollectionImpl instanceof InputCollection) {
       shape = "folder";
     }
-    return String.format("%s [label=\"%s\" shape=%s];", formatPCollection(pcollectionImpl,
jobPrototype), pcollectionImpl.getName(),
+    return String.format("%s [label=\"%s\" shape=%s];",
+        formatPCollection(pcollectionImpl, jobPrototype),
+        limitNodeNameLength(pcollectionImpl.getName()),
         shape);
   }
 
@@ -72,7 +77,8 @@ public class DotfileWriter {
    * @return The global node declaration for the Target
    */
   String formatTargetNodeDeclaration(Target target) {
-    return String.format("\"%s\" [label=\"%s\" shape=folder];", target.toString(), target.toString());
+    String nodeName = limitNodeNameLength(target.toString());
+    return String.format("\"%s\" [label=\"%s\" shape=folder];", nodeName, nodeName);
   }
 
   /**
@@ -85,9 +91,11 @@ public class DotfileWriter {
   String formatPCollection(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype)
{
     if (pcollectionImpl instanceof InputCollection) {
       InputCollection<?> inputCollection = (InputCollection<?>) pcollectionImpl;
-      return String.format("\"%s\"", inputCollection.getSource());
+      return String.format("\"%s\"", limitNodeNameLength(inputCollection.getSource().toString()));
     }
-    return String.format("\"%s@%d@%d\"", pcollectionImpl.getName(), pcollectionImpl.hashCode(),
jobPrototype.hashCode());
+    return String.format("\"%s\"",
+        limitNodeNameLength(
+            String.format("%s@%d@%d", pcollectionImpl.getName(), pcollectionImpl.hashCode(),
jobPrototype.hashCode())));
   }
 
   /**
@@ -97,7 +105,23 @@ public class DotfileWriter {
    * @return The dot-formatted chain of nodes
    */
   String formatNodeCollection(List<String> nodeCollection) {
-    return formatNodeCollection(nodeCollection, ImmutableMap.<String,String>of());
+    return formatNodeCollection(nodeCollection, ImmutableMap.<String, String>of());
+  }
+
+  /**
+   * Limit a node name length down to {@link #MAX_NODE_NAME_LENGTH}, to ensure valid (and
readable) dot files. If the
+   * name is already less than or equal to the maximum length, it will be returned untouched.
+   *
+   * @param nodeName node name to be limited in length
+   * @return the abbreviated node name if it was longer than the given maximum allowable
length
+   */
+  static String limitNodeNameLength(String nodeName) {
+    if (nodeName.length() <= MAX_NODE_NAME_LENGTH) {
+      return nodeName;
+    }
+    String hashString = Integer.toString(nodeName.hashCode());
+    return String.format("%s@%s",
+        StringUtils.abbreviate(nodeName, MAX_NODE_NAME_LENGTH - (hashString.length() + 1)),
hashString);
   }
 
   /**
@@ -140,7 +164,7 @@ public class DotfileWriter {
         String toNode = formatPCollection(pcollection, jobPrototype);
         for(Target target : targetDeps) {
           globalNodeDeclarations.add(formatTargetNodeDeclaration(target));
-          String fromNode = String.format("\"%s\"", target.toString());
+          String fromNode = String.format("\"%s\"", limitNodeNameLength(target.toString()));
           formattedNodePaths.add(
             formatNodeCollection(
               ImmutableList.of(fromNode, toNode),
@@ -210,7 +234,7 @@ public class DotfileWriter {
           addNodePathChain(nodePath, jobPrototype);
           nodePathChains.add(formatNodeCollection(
               Lists.newArrayList(formatPCollection(nodePath.descendingIterator().next(),
jobPrototype),
-                  String.format("\"%s\"", target.toString()))));
+                  String.format("\"%s\"", limitNodeNameLength(target.toString())))));
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/crunch/blob/5d43171a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
index 4b183ac..239da53 100644
--- a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
@@ -18,11 +18,16 @@
 package org.apache.crunch.impl.mr.plan;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 import java.util.List;
 
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
 import org.apache.crunch.ParallelDoOptions;
 import org.apache.crunch.Source;
 import org.apache.crunch.SourceTarget;
@@ -34,10 +39,6 @@ import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
 
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-
 public class DotfileWriterTest {
 
   private DotfileWriter dotfileWriter;
@@ -170,4 +171,17 @@ public class DotfileWriterTest {
     assertEquals("label = Reduce; color = red;", dotfileWriter.getTaskGraphAttributes(MRTaskType.REDUCE));
   }
 
+  @Test
+  public void testLimitNodeNameLength_AlreadyWithinLimit() {
+    String nodeName = "within_limit";
+    assertEquals(nodeName, DotfileWriter.limitNodeNameLength(nodeName));
+  }
+
+  @Test
+  public void testLimitNodeNameLength_OverLimit() {
+    String nodeName = Strings.repeat("x", DotfileWriter.MAX_NODE_NAME_LENGTH + 1);
+    String abbreviated = DotfileWriter.limitNodeNameLength(nodeName);
+    assertEquals(DotfileWriter.MAX_NODE_NAME_LENGTH, abbreviated.length());
+    assertTrue(abbreviated.startsWith("xxxxx"));
+  }
 }


Mime
View raw message