crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mkw...@apache.org
Subject crunch git commit: CRUNCH-604: Avoid expensive Writables.reloadWritableComparableCodes
Date Wed, 17 Aug 2016 01:01:22 GMT
Repository: crunch
Updated Branches:
  refs/heads/master 597eadd65 -> 0b19717d1


CRUNCH-604: Avoid expensive Writables.reloadWritableComparableCodes


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/0b19717d
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/0b19717d
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/0b19717d

Branch: refs/heads/master
Commit: 0b19717d105b58e58c1947eda6b673a387e330d0
Parents: 597eadd
Author: Micah Whitacre <mkwhit@gmail.com>
Authored: Tue Aug 2 16:29:55 2016 -0500
Committer: Micah Whitacre <mkwhit@gmail.com>
Committed: Tue Aug 2 16:29:55 2016 -0500

----------------------------------------------------------------------
 .../crunch/types/writable/TupleWritable.java      | 18 ++++++++++++++----
 .../apache/crunch/types/writable/Writables.java   | 17 +++++++++++------
 2 files changed, 25 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/0b19717d/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
index 068b0af..c29ffa8 100644
--- a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
@@ -54,6 +54,8 @@ public class TupleWritable extends Configured implements WritableComparable<Tupl
   private int[] written;
   private Writable[] values;
 
+  private boolean comparablesLoaded = false;
+
   /**
    * Create an empty tuple with no allocated storage for writables.
    */
@@ -65,10 +67,18 @@ public class TupleWritable extends Configured implements WritableComparable<Tupl
     super.setConf(conf);
     if (conf == null) return;
 
-    try {
-      Writables.reloadWritableComparableCodes(conf);
-    } catch (Exception e) {
-      throw new CrunchRuntimeException("Error reloading writable comparable codes", e);
+    // only reload comparables if this particular writable hasn't already done so;
+    // While there's also some caching within the static `reloadWritableComparableCodes`,
+    // it has to hit the configuration, which runs some embarrassing regexes and stuff.
+    // As for why SequenceFile$Reader calls `setConf` every single time it reads a new value
+    // (thus trigering this expensive path), I don't know.
+    if (!comparablesLoaded) {
+      try {
+        Writables.reloadWritableComparableCodes(conf);
+      } catch (Exception e) {
+        throw new CrunchRuntimeException("Error reloading writable comparable codes", e);
+      }
+      comparablesLoaded = true;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/crunch/blob/0b19717d/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
index 89d08ee..ca65972 100644
--- a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
@@ -129,6 +129,8 @@ public class Writables {
 
   private static final String WRITABLE_COMPARABLE_CODES = "crunch.writable.comparable.codes";
 
+  private static int WRITABLE_CODES_LOADED = 0;
+
   static void serializeWritableComparableCodes(Configuration conf) throws IOException {
     Map<Integer, String> codeToClassNameMap = Maps.transformValues(WRITABLE_CODES,
         new Function<Class<? extends Writable>, String>() {
@@ -142,12 +144,15 @@ public class Writables {
 
   static void reloadWritableComparableCodes(Configuration conf) throws Exception {
     if (conf.get(WRITABLE_COMPARABLE_CODES) != null) {
-      Map<String, String> codeToClassName = Splitter.on(';')
-          .withKeyValueSeparator(":").split(conf.get(WRITABLE_COMPARABLE_CODES));
-      for (Map.Entry<String, String> codeToClassNameEntry : codeToClassName.entrySet())
{
-        WRITABLE_CODES.put(
-            Integer.parseInt(codeToClassNameEntry.getKey()),
-            (Class<? extends Writable>) Class.forName(codeToClassNameEntry.getValue()));
+      String writableCodes = conf.get(WRITABLE_COMPARABLE_CODES);
+      if (writableCodes != null && writableCodes.hashCode() != WRITABLE_CODES_LOADED)
{
+        Map<String, String> codeToClassName = Splitter.on(';').withKeyValueSeparator(":").split(writableCodes);
+        for (Map.Entry<String, String> codeToClassNameEntry : codeToClassName.entrySet())
{
+          WRITABLE_CODES.put(
+                  Integer.parseInt(codeToClassNameEntry.getKey()),
+                  (Class<? extends Writable>) Class.forName(codeToClassNameEntry.getValue()));
+        }
+        WRITABLE_CODES_LOADED = writableCodes.hashCode();
       }
     }
   }


Mime
View raw message