crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gr...@apache.org
Subject git commit: CRUNCH-456 Abbreviate long node names in dot file
Date Tue, 05 Aug 2014 15:29:53 GMT
Repository: crunch
Updated Branches:
  refs/heads/apache-crunch-0.8 a5c592768 -> 3a760cdae


CRUNCH-456 Abbreviate long node names in dot file

Abbreviate node names down to 300 characters in job plan dot files
to ensure that the output dot files are both valid and readable.


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/3a760cda
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/3a760cda
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/3a760cda

Branch: refs/heads/apache-crunch-0.8
Commit: 3a760cdae913943449083d2863f7bcb40572a7cb
Parents: a5c5927
Author: Gabriel Reid <greid@apache.org>
Authored: Tue Aug 5 08:51:58 2014 +0200
Committer: Gabriel Reid <greid@apache.org>
Committed: Tue Aug 5 17:26:00 2014 +0200

----------------------------------------------------------------------
 .../crunch/impl/mr/plan/DotfileWriter.java      | 54 ++++++++++++++------
 .../crunch/impl/mr/plan/DotfileWriterTest.java  | 22 ++++++--
 2 files changed, 57 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/3a760cda/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
index 4d88296..de96852 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java
@@ -21,19 +21,19 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.crunch.Pair;
-import org.apache.crunch.SourceTarget;
-import org.apache.crunch.Target;
-import org.apache.crunch.impl.dist.collect.PCollectionImpl;
-import org.apache.crunch.impl.mr.collect.InputCollection;
-import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
-
 import com.google.common.base.Joiner;
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+import org.apache.commons.lang.StringUtils;
+import org.apache.crunch.Pair;
+import org.apache.crunch.SourceTarget;
+import org.apache.crunch.Target;
+import org.apache.crunch.impl.dist.collect.PCollectionImpl;
+import org.apache.crunch.impl.mr.collect.InputCollection;
+import org.apache.crunch.impl.mr.collect.PGroupedTableImpl;
 
 /**
  * Writes <a href="http://www.graphviz.org">Graphviz</a> dot files to illustrate
@@ -41,8 +41,11 @@ import com.google.common.collect.Sets;
  */
 public class DotfileWriter {
 
+  // Maximum length that a node name may have in the produced dot file
+  static final int MAX_NODE_NAME_LENGTH = 300;
+
   /** The types of tasks within a MapReduce job. */
-  enum MRTaskType { MAP, REDUCE };
+  enum MRTaskType { MAP, REDUCE }
 
   private Set<JobPrototype> jobPrototypes = Sets.newHashSet();
   private HashMultimap<Pair<JobPrototype, MRTaskType>, String> jobNodeDeclarations
= HashMultimap.create();
@@ -61,7 +64,9 @@ public class DotfileWriter {
     if (pcollectionImpl instanceof InputCollection) {
       shape = "folder";
     }
-    return String.format("%s [label=\"%s\" shape=%s];", formatPCollection(pcollectionImpl,
jobPrototype), pcollectionImpl.getName(),
+    return String.format("%s [label=\"%s\" shape=%s];",
+        formatPCollection(pcollectionImpl, jobPrototype),
+        limitNodeNameLength(pcollectionImpl.getName()),
         shape);
   }
 
@@ -72,7 +77,8 @@ public class DotfileWriter {
    * @return The global node declaration for the Target
    */
   String formatTargetNodeDeclaration(Target target) {
-    return String.format("\"%s\" [label=\"%s\" shape=folder];", target.toString(), target.toString());
+    String nodeName = limitNodeNameLength(target.toString());
+    return String.format("\"%s\" [label=\"%s\" shape=folder];", nodeName, nodeName);
   }
 
   /**
@@ -85,9 +91,11 @@ public class DotfileWriter {
   String formatPCollection(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype)
{
     if (pcollectionImpl instanceof InputCollection) {
       InputCollection<?> inputCollection = (InputCollection<?>) pcollectionImpl;
-      return String.format("\"%s\"", inputCollection.getSource());
+      return String.format("\"%s\"", limitNodeNameLength(inputCollection.getSource().toString()));
     }
-    return String.format("\"%s@%d@%d\"", pcollectionImpl.getName(), pcollectionImpl.hashCode(),
jobPrototype.hashCode());
+    return String.format("\"%s\"",
+        limitNodeNameLength(
+            String.format("%s@%d@%d", pcollectionImpl.getName(), pcollectionImpl.hashCode(),
jobPrototype.hashCode())));
   }
 
   /**
@@ -97,7 +105,23 @@ public class DotfileWriter {
    * @return The dot-formatted chain of nodes
    */
   String formatNodeCollection(List<String> nodeCollection) {
-    return formatNodeCollection(nodeCollection, ImmutableMap.<String,String>of());
+    return formatNodeCollection(nodeCollection, ImmutableMap.<String, String>of());
+  }
+
+  /**
+   * Limit a node name length down to {@link #MAX_NODE_NAME_LENGTH}, to ensure valid (and
readable) dot files. If the
+   * name is already less than or equal to the maximum length, it will be returned untouched.
+   *
+   * @param nodeName node name to be limited in length
+   * @return the abbreviated node name if it was longer than the given maximum allowable
length
+   */
+  static String limitNodeNameLength(String nodeName) {
+    if (nodeName.length() <= MAX_NODE_NAME_LENGTH) {
+      return nodeName;
+    }
+    String hashString = Integer.toString(nodeName.hashCode());
+    return String.format("%s@%s",
+        StringUtils.abbreviate(nodeName, MAX_NODE_NAME_LENGTH - (hashString.length() + 1)),
hashString);
   }
 
   /**
@@ -140,7 +164,7 @@ public class DotfileWriter {
         String toNode = formatPCollection(pcollection, jobPrototype);
         for(Target target : targetDeps) {
           globalNodeDeclarations.add(formatTargetNodeDeclaration(target));
-          String fromNode = String.format("\"%s\"", target.toString());
+          String fromNode = String.format("\"%s\"", limitNodeNameLength(target.toString()));
           formattedNodePaths.add(
             formatNodeCollection(
               ImmutableList.of(fromNode, toNode),
@@ -210,7 +234,7 @@ public class DotfileWriter {
           addNodePathChain(nodePath, jobPrototype);
           nodePathChains.add(formatNodeCollection(
               Lists.newArrayList(formatPCollection(nodePath.descendingIterator().next(),
jobPrototype),
-                  String.format("\"%s\"", target.toString()))));
+                  String.format("\"%s\"", limitNodeNameLength(target.toString())))));
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/crunch/blob/3a760cda/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
index 4b183ac..239da53 100644
--- a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java
@@ -18,11 +18,16 @@
 package org.apache.crunch.impl.mr.plan;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 import java.util.List;
 
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
 import org.apache.crunch.ParallelDoOptions;
 import org.apache.crunch.Source;
 import org.apache.crunch.SourceTarget;
@@ -34,10 +39,6 @@ import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
 
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-
 public class DotfileWriterTest {
 
   private DotfileWriter dotfileWriter;
@@ -170,4 +171,17 @@ public class DotfileWriterTest {
     assertEquals("label = Reduce; color = red;", dotfileWriter.getTaskGraphAttributes(MRTaskType.REDUCE));
   }
 
+  @Test
+  public void testLimitNodeNameLength_AlreadyWithinLimit() {
+    String nodeName = "within_limit";
+    assertEquals(nodeName, DotfileWriter.limitNodeNameLength(nodeName));
+  }
+
+  @Test
+  public void testLimitNodeNameLength_OverLimit() {
+    String nodeName = Strings.repeat("x", DotfileWriter.MAX_NODE_NAME_LENGTH + 1);
+    String abbreviated = DotfileWriter.limitNodeNameLength(nodeName);
+    assertEquals(DotfileWriter.MAX_NODE_NAME_LENGTH, abbreviated.length());
+    assertTrue(abbreviated.startsWith("xxxxx"));
+  }
 }


Mime
View raw message