lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sha...@apache.org
Subject [20/50] [abbrv] lucene-solr:feature/autoscaling: LUCENE-7914: Add a maximum recursion level in automaton recursive functions (Operations.isFinite and Operations.topsortState) to prevent large automaton to overflow the stack.
Date Tue, 08 Aug 2017 12:14:33 GMT
LUCENE-7914: Add a maximum recursion level in automaton recursive functions (Operations.isFinite
and Operations.topsortState) to prevent large automaton to overflow the stack.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7dde7984
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7dde7984
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7dde7984

Branch: refs/heads/feature/autoscaling
Commit: 7dde798473d1a8640edafb41f28ad25d17f25a2d
Parents: d620326
Author: Jim Ferenczi <jimczi@apache.org>
Authored: Fri Aug 4 12:02:30 2017 +0200
Committer: Jim Ferenczi <jimczi@apache.org>
Committed: Fri Aug 4 12:02:48 2017 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  7 ++++-
 .../lucene/util/automaton/Operations.java       | 27 +++++++++++++++-----
 .../apache/lucene/util/automaton/RegExp.java    | 22 ++++++++--------
 .../lucene/util/automaton/TestOperations.java   | 26 +++++++++++++++++--
 .../lucene/util/automaton/TestRegExp.java       | 25 ++++++------------
 .../analyzing/AnalyzingSuggesterTest.java       |  3 +--
 6 files changed, 71 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dde7984/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d5cc9e8..c0f263c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -14,7 +14,6 @@ Changes in Runtime Behavior
 
 
 ======================= Lucene 7.1.0 =======================
-(No Changes)
 
 Optimizations
 
@@ -22,6 +21,12 @@ Optimizations
   SortedSetDocValuesFacetCounts and others) builds its map (Robert
   Muir, Adrien Grand, Mike McCandless)
 
+Bug Fixes
+
+* LUCENE-7914: Add a maximum recursion level in automaton recursive
+  functions (Operations.isFinite and Operations.topsortState) to prevent
+  large automaton to overflow the stack (Robert Muir, Adrien Grand, Jim Ferenczi)
+
 ======================= Lucene 7.0.0 =======================
 
 New Features

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dde7984/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
index b673a82..8ed1b12 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -58,6 +58,11 @@ final public class Operations {
    */
   public static final int DEFAULT_MAX_DETERMINIZED_STATES = 10000;
 
+  /**
+   * Maximum level of recursion allowed in recursive operations.
+   */
+  public static final int MAX_RECURSION_LEVEL = 1000;
+
   private Operations() {}
 
   /**
@@ -1018,7 +1023,7 @@ final public class Operations {
     if (a.getNumStates() == 0) {
       return true;
     }
-    return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
+    return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()),
0);
   }
   
   /**
@@ -1026,13 +1031,16 @@ final public class Operations {
    * there are never transitions to dead states.)
    */
   // TODO: not great that this is recursive... in theory a
-  // large automata could exceed java's stack
-  private static boolean isFinite(Transition scratch, Automaton a, int state, BitSet path,
BitSet visited) {
+  // large automata could exceed java's stack so the maximum level of recursion is bounded
to 1000
+  private static boolean isFinite(Transition scratch, Automaton a, int state, BitSet path,
BitSet visited, int level) {
+    if (level > MAX_RECURSION_LEVEL) {
+      throw new IllegalArgumentException("input automaton is too large: " +  level);
+    }
     path.set(state);
     int numTransitions = a.initTransition(state, scratch);
     for(int t=0;t<numTransitions;t++) {
       a.getTransition(state, t, scratch);
-      if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch,
a, scratch.dest, path, visited))) {
+      if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch,
a, scratch.dest, path, visited, level+1))) {
         return false;
       }
     }
@@ -1264,7 +1272,7 @@ final public class Operations {
     int numStates = a.getNumStates();
     int[] states = new int[numStates];
     final BitSet visited = new BitSet(numStates);
-    int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
+    int upto = topoSortStatesRecurse(a, visited, states, 0, 0, 0);
 
     if (upto < states.length) {
       // There were dead states
@@ -1283,14 +1291,19 @@ final public class Operations {
     return states;
   }
 
-  private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int
upto, int state) {
+  // TODO: not great that this is recursive... in theory a
+  // large automata could exceed java's stack so the maximum level of recursion is bounded
to 1000
+  private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int
upto, int state, int level) {
+    if (level > MAX_RECURSION_LEVEL) {
+      throw new IllegalArgumentException("input automaton is too large: " + level);
+    }
     Transition t = new Transition();
     int count = a.initTransition(state, t);
     for (int i=0;i<count;i++) {
       a.getNextTransition(t);
       if (!visited.get(t.dest)) {
         visited.set(t.dest);
-        upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
+        upto = topoSortStatesRecurse(a, visited, states, upto, t.dest, level+1);
       }
     }
     states[upto] = state;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dde7984/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
index a813879..a643ddb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
@@ -542,19 +542,21 @@ public class RegExp {
         a = MinimizationOperations.minimize(a, maxDeterminizedStates);
         break;
       case REGEXP_REPEAT_MIN:
-        a = Operations.repeat(
-          exp1.toAutomatonInternal(automata, automaton_provider,
-            maxDeterminizedStates),
-          min);
+        a = exp1.toAutomatonInternal(automata, automaton_provider, maxDeterminizedStates);
+        int minNumStates = (a.getNumStates() - 1) * min;
+        if (minNumStates > maxDeterminizedStates) {
+          throw new TooComplexToDeterminizeException(a, minNumStates);
+        }
+        a = Operations.repeat(a, min);
         a = MinimizationOperations.minimize(a, maxDeterminizedStates);
         break;
       case REGEXP_REPEAT_MINMAX:
-        a = Operations.repeat(
-          exp1.toAutomatonInternal(automata, automaton_provider,
-            maxDeterminizedStates),
-          min,
-          max);
-        a = MinimizationOperations.minimize(a, maxDeterminizedStates);
+        a = exp1.toAutomatonInternal(automata, automaton_provider, maxDeterminizedStates);
+        int minMaxNumStates = (a.getNumStates() - 1) * max;
+        if (minMaxNumStates > maxDeterminizedStates) {
+          throw new TooComplexToDeterminizeException(a, minMaxNumStates);
+        }
+        a = Operations.repeat(a, min, max);
         break;
       case REGEXP_COMPLEMENT:
         a = Operations.complement(

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dde7984/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
index 01517fc..5750e8a 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
@@ -52,8 +52,7 @@ public class TestOperations extends LuceneTestCase {
     for (BytesRef bref : strings) {
       eachIndividual[i++] = Automata.makeString(bref.utf8ToString());
     }
-    return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)),
-      DEFAULT_MAX_DETERMINIZED_STATES);
+    return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)), DEFAULT_MAX_DETERMINIZED_STATES);
   }
 
   /** Test concatenation with empty language returns empty */
@@ -61,6 +60,7 @@ public class TestOperations extends LuceneTestCase {
     Automaton a = Automata.makeString("a");
     Automaton concat = Operations.concatenate(a, Automata.makeEmpty());
     assertTrue(Operations.isEmpty(concat));
+
   }
   
   /** Test optimization to concatenate() with empty String to an NFA */
@@ -124,6 +124,28 @@ public class TestOperations extends LuceneTestCase {
     }
   }
 
+  public void testIsFiniteEatsStack() {
+    char[] chars = new char[50000];
+    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
+    String bigString1 = new String(chars);
+    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
+    String bigString2 = new String(chars);
+    Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
+    IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () ->
Operations.isFinite(a));
+    assertTrue(exc.getMessage().contains("input automaton is too large"));
+  }
+
+  public void testTopoSortEatsStack() {
+    char[] chars = new char[50000];
+    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
+    String bigString1 = new String(chars);
+    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
+    String bigString2 = new String(chars);
+    Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
+    IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () ->
Operations.topoSortStates(a));
+    assertTrue(exc.getMessage().contains("input automaton is too large"));
+  }
+
   /**
    * Returns the set of all accepted strings.
    *

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dde7984/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
index b9ac619..7d24939 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
@@ -19,13 +19,6 @@ package org.apache.lucene.util.automaton;
 
 import org.apache.lucene.util.LuceneTestCase;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.ObjectInput;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutput;
-import java.io.ObjectOutputStream;
-
 public class TestRegExp extends LuceneTestCase {
 
   /**
@@ -54,6 +47,14 @@ public class TestRegExp extends LuceneTestCase {
     assertTrue(expected.getMessage().contains(source));
   }
 
+  public void testSerializeTooManyStatesToRepeat() throws Exception {
+    String source = "a{50001}";
+    TooComplexToDeterminizeException expected = expectThrows(TooComplexToDeterminizeException.class,
() -> {
+      new RegExp(source).toAutomaton(50000);
+    });
+    assertTrue(expected.getMessage().contains(source));
+  }
+
   // LUCENE-6713
   public void testSerializeTooManyStatesToDeterminizeExc() throws Exception {
     // LUCENE-6046
@@ -62,16 +63,6 @@ public class TestRegExp extends LuceneTestCase {
       new RegExp(source).toAutomaton();
     });
     assertTrue(expected.getMessage().contains(source));
-
-    ByteArrayOutputStream bos = new ByteArrayOutputStream();
-    ObjectOutput out = new ObjectOutputStream(bos);   
-    out.writeObject(expected);
-    byte[] bytes = bos.toByteArray();
-
-    ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
-    ObjectInput in = new ObjectInputStream(bis);
-    TooComplexToDeterminizeException e2 = (TooComplexToDeterminizeException) in.readObject();
-    assertNotNull(e2.getMessage());
   }
 
   // LUCENE-6046

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dde7984/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
index 590eb86..06d44b9 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
@@ -1252,10 +1252,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
       suggester.build(new InputArrayIterator(new Input[] {
             new Input(bigString, 7)}));
       fail("did not hit expected exception");
-    } catch (StackOverflowError soe) {
-      // OK
     } catch (IllegalArgumentException iae) {
       // expected
+      assertTrue(iae.getMessage().contains("input automaton is too large"));
     }
     IOUtils.close(a, tempDir);
   }


Mime
View raw message