hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From g...@apache.org
Subject hadoop git commit: HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov via gera)
Date Fri, 06 Feb 2015 08:33:23 GMT
Repository: hadoop
Updated Branches:
  refs/heads/trunk 02f154a00 -> 644548f20


HADOOP-11506. Configuration variable expansion regex expensive for long values. (Gera Shegalov
via gera)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/644548f2
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/644548f2
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/644548f2

Branch: refs/heads/trunk
Commit: 644548f201743408904dfe24b9f5b515b2c96713
Parents: 02f154a
Author: Gera Shegalov <gera@apache.org>
Authored: Fri Jan 30 23:18:03 2015 -0800
Committer: Gera Shegalov <gera@apache.org>
Committed: Fri Feb 6 00:27:54 2015 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |   3 +
 .../org/apache/hadoop/conf/Configuration.java   | 124 ++++++++++++++++---
 .../apache/hadoop/conf/TestConfiguration.java   |  54 +++++++-
 3 files changed, 159 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/644548f2/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 44adf7f..66c2cba 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -573,6 +573,9 @@ Release 2.7.0 - UNRELEASED
     HADOOP-11188. hadoop-azure: automatically expand page blobs when they become
     full. (Eric Hanson via cnauroth)
 
+    HADOOP-11506. Configuration variable expansion regex expensive for long
+    values. (Gera Shegalov via gera)
+
   BUG FIXES
 
     HADOOP-11488. Difference in default connection timeout for S3A FS

http://git-wip-us.apache.org/repos/asf/hadoop/blob/644548f2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 8f98d0a..ea0d3a6 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -187,7 +187,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     "testingforemptydefaultvalue";
 
   private boolean allowNullValueProperties = false;
-  
+
   private static class Resource {
     private final Object resource;
     private final String name;
@@ -845,31 +845,106 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     resources.add(resource);                      // add to resources
     reloadConfiguration();
   }
-  
-  private static final Pattern VAR_PATTERN =
-      Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
 
   private static final int MAX_SUBST = 20;
 
+  private static final int SUB_START_IDX = 0;
+  private static final int SUB_END_IDX = SUB_START_IDX + 1;
+
+  /**
+   * This is a manual implementation of the following regex
+   * "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
+   * a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
+   * Hadoop apps building on the assumption Configuration#get is an O(1)
+   * hash table lookup, especially when the eval is a long string.
+   *
+   * @param eval a string that may contain variables requiring expansion.
+   * @return a 2-element int array res such that
+   * eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
+   * ${var} in eval. If no variable is found -1, -1 is returned.
+   */
+  private static int[] findSubVariable(String eval) {
+    int[] result = {-1, -1};
+
+    int matchStart;
+    int leftBrace;
+
+    // scanning for a brace first because it's less frequent than $
+    // that can occur in nested class names
+    //
+    match_loop:
+    for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
+         // minimum left brace position (follows '$')
+         leftBrace > 0
+         // right brace of a smallest valid expression "${c}"
+         && leftBrace + "{c".length() < eval.length();
+         leftBrace = eval.indexOf('{', matchStart)) {
+      int matchedLen = 0;
+      if (eval.charAt(leftBrace - 1) == '$') {
+        int subStart = leftBrace + 1; // after '{'
+        for (int i = subStart; i < eval.length(); i++) {
+          switch (eval.charAt(i)) {
+            case '}':
+              if (matchedLen > 0) { // match
+                result[SUB_START_IDX] = subStart;
+                result[SUB_END_IDX] = subStart + matchedLen;
+                break match_loop;
+              }
+              // fall through to skip 1 char
+            case ' ':
+            case '$':
+              matchStart = i + 1;
+              continue match_loop;
+            default:
+              matchedLen++;
+          }
+        }
+        // scanned from "${"  to the end of eval, and no reset via ' ', '$':
+        //    no match!
+        break match_loop;
+      } else {
+        // not a start of a variable
+        //
+        matchStart = leftBrace + 1;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Attempts to repeatedly expand the value {@code expr} by replacing the
+   * left-most substring of the form "${var}" in the following precedence order
+   * <ol>
+   *   <li>by the value of the Java system property "var" if defined</li>
+   *   <li>by the value of the configuration key "var" if defined</li>
+   * </ol>
+   *
+   * If var is unbounded the current state of expansion "prefix${var}suffix" is
+   * returned.
+   *
+   * If a cycle is detected: replacing var1 requires replacing var2 ... requires
+   * replacing var1, i.e., the cycle is shorter than
+   * {@link Configuration#MAX_SUBST} then the original expr is returned.
+   *
+   * @param expr the literal value of a config key
+   * @return null if expr is null, otherwise the value resulting from expanding
+   * expr using the algorithm above.
+   * @throws IllegalArgumentException when more than
+   * {@link Configuration#MAX_SUBST} replacements are required
+   */
   private String substituteVars(String expr) {
     if (expr == null) {
       return null;
     }
-    Matcher match = VAR_PATTERN.matcher("");
     String eval = expr;
-    Set<String> evalSet = new HashSet<String>();
-    for(int s=0; s<MAX_SUBST; s++) {
-      if (evalSet.contains(eval)) {
-        // Cyclic resolution pattern detected. Return current expression.
+    Set<String> evalSet = null;
+    for(int s = 0; s < MAX_SUBST; s++) {
+      final int[] varBounds = findSubVariable(eval);
+      if (varBounds[SUB_START_IDX] == -1) {
         return eval;
       }
-      evalSet.add(eval);
-      match.reset(eval);
-      if (!match.find()) {
-        return eval;
-      }
-      String var = match.group();
-      var = var.substring(2, var.length()-1); // remove ${ .. }
+      final String var = eval.substring(varBounds[SUB_START_IDX],
+          varBounds[SUB_END_IDX]);
       String val = null;
       try {
         val = System.getProperty(var);
@@ -882,8 +957,23 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
       if (val == null) {
         return eval; // return literal ${var}: var is unbound
       }
+
+      // prevent recursive resolution
+      //
+      final int dollar = varBounds[SUB_START_IDX] - "${".length();
+      final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
+      final String refVar = eval.substring(dollar, afterRightBrace);
+      if (evalSet == null) {
+        evalSet = new HashSet<String>();
+      }
+      if (!evalSet.add(refVar)) {
+        return expr; // return original expression if there is a loop
+      }
+
       // substitute
-      eval = eval.substring(0, match.start())+val+eval.substring(match.end());
+      eval = eval.substring(0, dollar)
+             + val
+             + eval.substring(afterRightBrace);
     }
     throw new IllegalStateException("Variable substitution depth too large: " 
                                     + MAX_SUBST + " " + expr);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/644548f2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index 7b4fbb5..b84045d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -1271,12 +1271,56 @@ public class TestConfiguration extends TestCase {
   }
 
   public void testInvalidSubstitutation() {
+    final Configuration configuration = new Configuration(false);
+
+    // 2-var loops
+    //
+    final String key = "test.random.key";
+    for (String keyExpression : Arrays.asList(
+        "${" + key + "}",
+        "foo${" + key + "}",
+        "foo${" + key + "}bar",
+        "${" + key + "}bar")) {
+      configuration.set(key, keyExpression);
+      assertEquals("Unexpected value", keyExpression, configuration.get(key));
+    }
+
+    //
+    // 3-variable loops
+    //
+
+    final String expVal1 = "${test.var2}";
+    String testVar1 = "test.var1";
+    configuration.set(testVar1, expVal1);
+    configuration.set("test.var2", "${test.var3}");
+    configuration.set("test.var3", "${test.var1}");
+    assertEquals("Unexpected value", expVal1, configuration.get(testVar1));
+
+    // 3-variable loop with non-empty value prefix/suffix
+    //
+    final String expVal2 = "foo2${test.var2}bar2";
+    configuration.set(testVar1, expVal2);
+    configuration.set("test.var2", "foo3${test.var3}bar3");
+    configuration.set("test.var3", "foo1${test.var1}bar1");
+    assertEquals("Unexpected value", expVal2, configuration.get(testVar1));
+  }
+
+  public void testIncompleteSubbing() {
+    Configuration configuration = new Configuration(false);
     String key = "test.random.key";
-    String keyExpression = "${" + key + "}";
-    Configuration configuration = new Configuration();
-    configuration.set(key, keyExpression);
-    String value = configuration.get(key);
-    assertTrue("Unexpected value " + value, value.equals(keyExpression));
+    for (String keyExpression : Arrays.asList(
+        "{}",
+        "${}",
+        "{" + key,
+        "${" + key,
+        "foo${" + key,
+        "foo${" + key + "bar",
+        "foo{" + key + "}bar",
+        "${" + key + "bar")) {
+      configuration.set(key, keyExpression);
+      String value = configuration.get(key);
+      assertTrue("Unexpected value " + value, value.equals(keyExpression));
+    }
   }
 
   public void testBoolean() {


Mime
View raw message