crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jwi...@apache.org
Subject git commit: CRUNCH-458: Eliminate random split decisions with TreeSets/TreeMap
Date Tue, 12 Aug 2014 16:12:03 GMT
Repository: crunch
Updated Branches:
  refs/heads/apache-crunch-0.8 ee7838408 -> 54df53c1d


CRUNCH-458: Eliminate random split decisions with TreeSets/TreeMap


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/54df53c1
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/54df53c1
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/54df53c1

Branch: refs/heads/apache-crunch-0.8
Commit: 54df53c1d6fdd4759351852a04f8561471ec5a94
Parents: ee78384
Author: Josh Wills <jwills@apache.org>
Authored: Wed Aug 6 18:12:19 2014 -0700
Committer: Josh Wills <jwills@apache.org>
Committed: Tue Aug 12 09:09:46 2014 -0700

----------------------------------------------------------------------
 .../org/apache/crunch/impl/mr/plan/Edge.java    | 30 ++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/54df53c1/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
index 4006930..111905c 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/Edge.java
@@ -18,6 +18,7 @@
 package org.apache.crunch.impl.mr.plan;
 
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -42,7 +43,7 @@ class Edge {
   Edge(Vertex head, Vertex tail) {
     this.head = head;
     this.tail = tail;
-    this.paths = Sets.newHashSet();
+    this.paths = Sets.newTreeSet(NODE_CMP);
   }
   
   public Vertex getHead() {
@@ -68,7 +69,7 @@ class Edge {
   public Map<NodePath,  PCollectionImpl> getSplitPoints(boolean breakpointsOnly) {
     List<NodePath> np = Lists.newArrayList(paths);
     List<PCollectionImpl<?>> smallestOverallPerPath = Lists.newArrayListWithExpectedSize(np.size());
-    Map<PCollectionImpl<?>, Set<Integer>> pathCounts = Maps.newHashMap();
+    Map<PCollectionImpl<?>, Set<Integer>> pathCounts = Maps.newTreeMap(PCOL_CMP);
     Map<NodePath, PCollectionImpl> splitPoints = Maps.newHashMap();
     for (int i = 0; i < np.size(); i++) {
       long bestSize = Long.MAX_VALUE;
@@ -165,4 +166,29 @@ class Edge {
   public String toString() {
     return ReflectionToStringBuilder.toString(this, ToStringStyle.SHORT_PREFIX_STYLE);
   }
+
+  private static Comparator<NodePath> NODE_CMP = new Comparator<NodePath>() {
+    @Override
+    public int compare(NodePath left, NodePath right) {
+      if (left == right || left.equals(right)) {
+        return 0;
+      }
+      return left.toString().compareTo(right.toString());
+    }
+  };
+
+  private static Comparator<PCollectionImpl<?>> PCOL_CMP = new Comparator<PCollectionImpl<?>>()
{
+    @Override
+    public int compare(PCollectionImpl<?> left, PCollectionImpl<?> right) {
+      if (left == right || left.equals(right)) {
+        return 0;
+      }
+      String leftName = left.getName();
+      String rightName = right.getName();
+      if (leftName == null || rightName == null || leftName.equals(rightName)) {
+        return left.hashCode() < right.hashCode() ? -1 : 1;
+      }
+      return leftName.compareTo(rightName);
+    }
+  };
 }


Mime
View raw message