lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1602049 - in /lucene/dev/branches/lucene5752/lucene: analysis/common/src/test/org/apache/lucene/analysis/core/ core/src/java/org/apache/lucene/search/ core/src/java/org/apache/lucene/util/automaton/ core/src/test/org/apache/lucene/util/aut...
Date Wed, 11 Jun 2014 23:22:11 GMT
Author: mikemccand
Date: Wed Jun 11 23:22:10 2014
New Revision: 1602049

URL: http://svn.apache.org/r1602049
Log:
LUCENE-5752: more cutover

Modified:
    lucene/dev/branches/lucene5752/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
    lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
    lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
    lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestBasicOperations.java
    lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
    lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestSpecialOperations.java
    lucene/dev/branches/lucene5752/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
    lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java

Modified: lucene/dev/branches/lucene5752/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
Wed Jun 11 23:22:10 2014
@@ -31,11 +31,9 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.State;
-import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.LightAutomaton;
 
 /**
  * Compares MockTokenizer (which is simple with no optimizations) with equivalent 
@@ -50,18 +48,18 @@ public class TestDuelingAnalyzers extend
   @Override
   public void setUp() throws Exception {
     super.setUp();
+    LightAutomaton single = new LightAutomaton();
+    int initial = single.createState();
+    int accept = single.createState();
+    single.setAccept(accept, true);
+
     // build an automaton matching this jvm's letter definition
-    State initial = new State();
-    State accept = new State();
-    accept.setAccept(true);
     for (int i = 0; i <= 0x10FFFF; i++) {
       if (Character.isLetter(i)) {
-        initial.addTransition(new Transition(i, i, accept));
+        single.addTransition(initial, accept, i);
       }
     }
-    Automaton single = new Automaton(initial);
-    single.reduce();
-    Automaton repeat = BasicOperations.repeat(single);
+    LightAutomaton repeat = BasicOperations.repeatLight(single);
     jvmLetter = new CharacterRunAutomaton(repeat);
   }
   

Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
Wed Jun 11 23:22:10 2014
@@ -22,7 +22,6 @@ import java.util.List;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
-import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.LightAutomaton;
@@ -55,7 +54,7 @@ public class WildcardQuery extends Autom
    * Constructs a query for terms matching <code>term</code>. 
    */
   public WildcardQuery(Term term) {
-    super(term, toLightAutomaton(term));
+    super(term, toAutomaton(term));
   }
   
   /**
@@ -63,44 +62,7 @@ public class WildcardQuery extends Autom
    * @lucene.internal
    */
   @SuppressWarnings("fallthrough")
-  public static Automaton toAutomaton(Term wildcardquery) {
-    List<Automaton> automata = new ArrayList<>();
-    
-    String wildcardText = wildcardquery.text();
-    
-    for (int i = 0; i < wildcardText.length();) {
-      final int c = wildcardText.codePointAt(i);
-      int length = Character.charCount(c);
-      switch(c) {
-        case WILDCARD_STRING: 
-          automata.add(BasicAutomata.makeAnyString());
-          break;
-        case WILDCARD_CHAR:
-          automata.add(BasicAutomata.makeAnyChar());
-          break;
-        case WILDCARD_ESCAPE:
-          // add the next codepoint instead, if it exists
-          if (i + length < wildcardText.length()) {
-            final int nextChar = wildcardText.codePointAt(i + length);
-            length += Character.charCount(nextChar);
-            automata.add(BasicAutomata.makeChar(nextChar));
-            break;
-          } // else fallthru, lenient parsing with a trailing \
-        default:
-          automata.add(BasicAutomata.makeChar(c));
-      }
-      i += length;
-    }
-    
-    return BasicOperations.concatenate(automata);
-  }
-
-  /**
-   * Convert Lucene wildcard syntax into an automaton.
-   * @lucene.internal
-   */
-  @SuppressWarnings("fallthrough")
-  public static LightAutomaton toLightAutomaton(Term wildcardquery) {
+  public static LightAutomaton toAutomaton(Term wildcardquery) {
     List<LightAutomaton> automata = new ArrayList<>();
     
     String wildcardText = wildcardquery.text();

Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
Wed Jun 11 23:22:10 2014
@@ -1060,6 +1060,9 @@ final public class BasicOperations {
     // Add epsilon transition from new initial state
     int stateOffset = 1;
     for(LightAutomaton a : l) {
+      if (a.getNumStates() == 0) {
+        continue;
+      }
       result.addEpsilon(0, stateOffset);
       stateOffset += a.getNumStates();
     }

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestBasicOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestBasicOperations.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestBasicOperations.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestBasicOperations.java
Wed Jun 11 23:22:10 2014
@@ -46,46 +46,11 @@ public class TestBasicOperations extends
     return BasicOperations.determinize(BasicOperations.unionLight(Arrays.asList(eachIndividual)));
   }
 
-  /** Test optimization to concatenate() */
-  public void testSingletonConcatenate() {
-    Automaton singleton = BasicAutomata.makeString("prefix");
-    Automaton expandedSingleton = singleton.cloneExpanded();
-    Automaton other = BasicAutomata.makeCharRange('5', '7');
-    Automaton concat = BasicOperations.concatenate(singleton, other);
-    assertTrue(concat.isDeterministic());
-    assertTrue(BasicOperations.sameLanguage(BasicOperations.concatenate(expandedSingleton,
other), concat));
-  }
-  
-  /** Test optimization to concatenate() to an NFA */
-  public void testSingletonNFAConcatenate() {
-    Automaton singleton = BasicAutomata.makeString("prefix");
-    Automaton expandedSingleton = singleton.cloneExpanded();
-    // an NFA (two transitions for 't' from initial state)
-    Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
-        BasicAutomata.makeString("three"));
-    Automaton concat = BasicOperations.concatenate(singleton, nfa);
-    assertFalse(concat.isDeterministic());
-    assertTrue(BasicOperations.sameLanguage(BasicOperations.concatenate(expandedSingleton,
nfa), concat));
-  }
-  
-  /** Test optimization to concatenate() with empty String */
-  public void testEmptySingletonConcatenate() {
-    Automaton singleton = BasicAutomata.makeString("");
-    Automaton expandedSingleton = singleton.cloneExpanded();
-    Automaton other = BasicAutomata.makeCharRange('5', '7');
-    Automaton concat1 = BasicOperations.concatenate(expandedSingleton, other);
-    Automaton concat2 = BasicOperations.concatenate(singleton, other);
-    assertTrue(concat2.isDeterministic());
-    assertTrue(BasicOperations.sameLanguage(concat1, concat2));
-    assertTrue(BasicOperations.sameLanguage(other, concat1));
-    assertTrue(BasicOperations.sameLanguage(other, concat2));
-  }
-  
   /** Test concatenation with empty language returns empty */
   public void testEmptyLanguageConcatenate() {
-    Automaton a = BasicAutomata.makeString("a");
-    Automaton concat = BasicOperations.concatenate(a, BasicAutomata.makeEmpty());
-    assertTrue(BasicOperations.isEmpty(concat));
+    LightAutomaton a = BasicAutomata.makeStringLight("a");
+    LightAutomaton concat = BasicOperations.concatenateLight(a, BasicAutomata.makeEmptyLight());
+    assertTrue(concat.isEmpty());
   }
   
   /** Test optimization to concatenate() with empty String to an NFA */

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
Wed Jun 11 23:22:10 2014
@@ -41,7 +41,7 @@ public class TestLevenshteinAutomata ext
   
   // LUCENE-3094
   public void testNoWastedStates() throws Exception {
-    AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc", false).toAutomaton(1));
+    AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc", false).toLightAutomaton(1));
   }
   
   /** 
@@ -66,35 +66,35 @@ public class TestLevenshteinAutomata ext
   private void assertLev(String s, int maxDistance) {
     LevenshteinAutomata builder = new LevenshteinAutomata(s, false);
     LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true);
-    Automaton automata[] = new Automaton[maxDistance + 1];
-    Automaton tautomata[] = new Automaton[maxDistance + 1];
+    LightAutomaton automata[] = new LightAutomaton[maxDistance + 1];
+    LightAutomaton tautomata[] = new LightAutomaton[maxDistance + 1];
     for (int n = 0; n < automata.length; n++) {
-      automata[n] = builder.toAutomaton(n);
-      tautomata[n] = tbuilder.toAutomaton(n);
+      automata[n] = builder.toLightAutomaton(n);
+      tautomata[n] = tbuilder.toLightAutomaton(n);
       assertNotNull(automata[n]);
       assertNotNull(tautomata[n]);
-      assertTrue(automata[n].isDeterministic());
-      assertTrue(tautomata[n].isDeterministic());
+      assertTrue(BasicOperations.isDeterministic(automata[n]));
+      assertTrue(BasicOperations.isDeterministic(tautomata[n]));
       assertTrue(SpecialOperations.isFinite(automata[n]));
       assertTrue(SpecialOperations.isFinite(tautomata[n]));
       AutomatonTestUtil.assertNoDetachedStates(automata[n]);
       AutomatonTestUtil.assertNoDetachedStates(tautomata[n]);
       // check that the dfa for n-1 accepts a subset of the dfa for n
       if (n > 0) {
-        assertTrue(automata[n-1].subsetOf(automata[n]));
-        assertTrue(automata[n-1].subsetOf(tautomata[n]));
-        assertTrue(tautomata[n-1].subsetOf(automata[n]));
-        assertTrue(tautomata[n-1].subsetOf(tautomata[n]));
+        assertTrue(BasicOperations.subsetOf(automata[n-1], automata[n]));
+        assertTrue(BasicOperations.subsetOf(automata[n-1], tautomata[n]));
+        assertTrue(BasicOperations.subsetOf(tautomata[n-1], automata[n]));
+        assertTrue(BasicOperations.subsetOf(tautomata[n-1], tautomata[n]));
         assertNotSame(automata[n-1], automata[n]);
       }
       // check that Lev(N) is a subset of LevT(N)
-      assertTrue(automata[n].subsetOf(tautomata[n]));
+      assertTrue(BasicOperations.subsetOf(automata[n], tautomata[n]));
       // special checks for specific n
       switch(n) {
         case 0:
           // easy, matches the string itself
-          assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeString(s), automata[0]));
-          assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeString(s), tautomata[0]));
+          assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeStringLight(s), automata[0]));
+          assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeStringLight(s), tautomata[0]));
           break;
         case 1:
           // generate a lev1 naively, and check the accepted lang is the same.
@@ -113,14 +113,14 @@ public class TestLevenshteinAutomata ext
    * Return an automaton that accepts all 1-character insertions, deletions, and
    * substitutions of s.
    */
-  private Automaton naiveLev1(String s) {
-    Automaton a = BasicAutomata.makeString(s);
-    a = BasicOperations.union(a, insertionsOf(s));
-    MinimizationOperations.minimize(a);
-    a = BasicOperations.union(a, deletionsOf(s));
-    MinimizationOperations.minimize(a);
-    a = BasicOperations.union(a, substitutionsOf(s));
-    MinimizationOperations.minimize(a);
+  private LightAutomaton naiveLev1(String s) {
+    LightAutomaton a = BasicAutomata.makeStringLight(s);
+    a = BasicOperations.unionLight(a, insertionsOf(s));
+    a = MinimizationOperationsLight.minimize(a);
+    a = BasicOperations.unionLight(a, deletionsOf(s));
+    a = MinimizationOperationsLight.minimize(a);
+    a = BasicOperations.unionLight(a, substitutionsOf(s));
+    a = MinimizationOperationsLight.minimize(a);
     
     return a;
   }
@@ -129,10 +129,10 @@ public class TestLevenshteinAutomata ext
    * Return an automaton that accepts all 1-character insertions, deletions,
    * substitutions, and transpositions of s.
    */
-  private Automaton naiveLev1T(String s) {
-    Automaton a = naiveLev1(s);
-    a = BasicOperations.union(a, transpositionsOf(s));
-    MinimizationOperations.minimize(a);
+  private LightAutomaton naiveLev1T(String s) {
+    LightAutomaton a = naiveLev1(s);
+    a = BasicOperations.unionLight(a, transpositionsOf(s));
+    a = MinimizationOperationsLight.minimize(a);
     return a;
   }
   
@@ -140,19 +140,18 @@ public class TestLevenshteinAutomata ext
    * Return an automaton that accepts all 1-character insertions of s (inserting
    * one character)
    */
-  private Automaton insertionsOf(String s) {
-    List<Automaton> list = new ArrayList<>();
+  private LightAutomaton insertionsOf(String s) {
+    List<LightAutomaton> list = new ArrayList<>();
     
     for (int i = 0; i <= s.length(); i++) {
-      Automaton a = BasicAutomata.makeString(s.substring(0, i));
-      a = BasicOperations.concatenate(a, BasicAutomata.makeAnyChar());
-      a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
-          .substring(i)));
+      LightAutomaton a = BasicAutomata.makeStringLight(s.substring(0, i));
+      a = BasicOperations.concatenateLight(a, BasicAutomata.makeAnyCharLight());
+      a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(s.substring(i)));
       list.add(a);
     }
     
-    Automaton a = BasicOperations.union(list);
-    MinimizationOperations.minimize(a);
+    LightAutomaton a = BasicOperations.unionLight(list);
+    a = MinimizationOperationsLight.minimize(a);
     return a;
   }
   
@@ -160,19 +159,17 @@ public class TestLevenshteinAutomata ext
    * Return an automaton that accepts all 1-character deletions of s (deleting
    * one character).
    */
-  private Automaton deletionsOf(String s) {
-    List<Automaton> list = new ArrayList<>();
+  private LightAutomaton deletionsOf(String s) {
+    List<LightAutomaton> list = new ArrayList<>();
     
     for (int i = 0; i < s.length(); i++) {
-      Automaton a = BasicAutomata.makeString(s.substring(0, i));
-      a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
-          .substring(i + 1)));
-      a.expandSingleton();
+      LightAutomaton a = BasicAutomata.makeStringLight(s.substring(0, i));
+      a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(s.substring(i
+ 1)));
       list.add(a);
     }
     
-    Automaton a = BasicOperations.union(list);
-    MinimizationOperations.minimize(a);
+    LightAutomaton a = BasicOperations.unionLight(list);
+    a = MinimizationOperationsLight.minimize(a);
     return a;
   }
   
@@ -180,19 +177,18 @@ public class TestLevenshteinAutomata ext
    * Return an automaton that accepts all 1-character substitutions of s
    * (replacing one character)
    */
-  private Automaton substitutionsOf(String s) {
-    List<Automaton> list = new ArrayList<>();
+  private LightAutomaton substitutionsOf(String s) {
+    List<LightAutomaton> list = new ArrayList<>();
     
     for (int i = 0; i < s.length(); i++) {
-      Automaton a = BasicAutomata.makeString(s.substring(0, i));
-      a = BasicOperations.concatenate(a, BasicAutomata.makeAnyChar());
-      a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
-          .substring(i + 1)));
+      LightAutomaton a = BasicAutomata.makeStringLight(s.substring(0, i));
+      a = BasicOperations.concatenateLight(a, BasicAutomata.makeAnyCharLight());
+      a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(s.substring(i
+ 1)));
       list.add(a);
     }
     
-    Automaton a = BasicOperations.union(list);
-    MinimizationOperations.minimize(a);
+    LightAutomaton a = BasicOperations.unionLight(list);
+    a = MinimizationOperationsLight.minimize(a);
     return a;
   }
   
@@ -200,10 +196,11 @@ public class TestLevenshteinAutomata ext
    * Return an automaton that accepts all transpositions of s
    * (transposing two adjacent characters)
    */
-  private Automaton transpositionsOf(String s) {
-    if (s.length() < 2)
-      return BasicAutomata.makeEmpty();
-    List<Automaton> list = new ArrayList<>();
+  private LightAutomaton transpositionsOf(String s) {
+    if (s.length() < 2) {
+      return BasicAutomata.makeEmptyLight();
+    }
+    List<LightAutomaton> list = new ArrayList<>();
     for (int i = 0; i < s.length()-1; i++) {
       StringBuilder sb = new StringBuilder();
       sb.append(s.substring(0, i));
@@ -211,15 +208,16 @@ public class TestLevenshteinAutomata ext
       sb.append(s.charAt(i));
       sb.append(s.substring(i+2, s.length()));
       String st = sb.toString();
-      if (!st.equals(s))
-        list.add(BasicAutomata.makeString(st));
+      if (!st.equals(s)) {
+        list.add(BasicAutomata.makeStringLight(st));
+      }
     }
-    Automaton a = BasicOperations.union(list);
-    MinimizationOperations.minimize(a);
+    LightAutomaton a = BasicOperations.unionLight(list);
+    a = MinimizationOperationsLight.minimize(a);
     return a;
   }
   
-  private void assertBruteForce(String input, Automaton dfa, int distance) {
+  private void assertBruteForce(String input, LightAutomaton dfa, int distance) {
     CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
     int maxLen = input.length() + distance + 1;
     int maxNum = (int) Math.pow(2, maxLen);
@@ -234,7 +232,7 @@ public class TestLevenshteinAutomata ext
     }
   }
   
-  private void assertBruteForceT(String input, Automaton dfa, int distance) {
+  private void assertBruteForceT(String input, LightAutomaton dfa, int distance) {
     CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
     int maxLen = input.length() + distance + 1;
     int maxNum = (int) Math.pow(2, maxLen);

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestSpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestSpecialOperations.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestSpecialOperations.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestSpecialOperations.java
Wed Jun 11 23:22:10 2014
@@ -43,10 +43,10 @@ public class TestSpecialOperations exten
 
   /** Pass false for testRecursive if the expected strings
    *  may be too long */
-  private Set<IntsRef> getFiniteStrings(Automaton a, int limit, boolean testRecursive)
{
+  private Set<IntsRef> getFiniteStrings(LightAutomaton a, int limit, boolean testRecursive)
{
     Set<IntsRef> result = SpecialOperations.getFiniteStrings(a, limit);
     if (testRecursive) {
-      assertEquals(AutomatonTestUtil.getFiniteStringsRecursive(a, limit), result);
+      assertEquals(AutomatonTestUtil.getFiniteStringsRecursiveLight(a, limit), result);
     }
     return result;
   }
@@ -55,8 +55,8 @@ public class TestSpecialOperations exten
    * Basic test for getFiniteStrings
    */
   public void testFiniteStringsBasic() {
-    Automaton a = BasicOperations.union(BasicAutomata.makeString("dog"), BasicAutomata.makeString("duck"));
-    MinimizationOperations.minimize(a);
+    LightAutomaton a = BasicOperations.unionLight(BasicAutomata.makeStringLight("dog"), BasicAutomata.makeStringLight("duck"));
+    a = MinimizationOperationsLight.minimize(a);
     Set<IntsRef> strings = getFiniteStrings(a, -1, true);
     assertEquals(2, strings.size());
     IntsRef dog = new IntsRef();
@@ -73,7 +73,7 @@ public class TestSpecialOperations exten
     String bigString1 = new String(chars);
     TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
     String bigString2 = new String(chars);
-    Automaton a = BasicOperations.union(BasicAutomata.makeString(bigString1), BasicAutomata.makeString(bigString2));
+    LightAutomaton a = BasicOperations.unionLight(BasicAutomata.makeStringLight(bigString1),
BasicAutomata.makeStringLight(bigString2));
     Set<IntsRef> strings = getFiniteStrings(a, -1, false);
     assertEquals(2, strings.size());
     IntsRef scratch = new IntsRef();
@@ -91,10 +91,10 @@ public class TestSpecialOperations exten
     }
 
     Set<IntsRef> strings = new HashSet<IntsRef>();
-    List<Automaton> automata = new ArrayList<Automaton>();
+    List<LightAutomaton> automata = new ArrayList<>();
     for(int i=0;i<numStrings;i++) {
       String s = TestUtil.randomSimpleString(random(), 1, 200);
-      automata.add(BasicAutomata.makeString(s));
+      automata.add(BasicAutomata.makeStringLight(s));
       IntsRef scratch = new IntsRef();
       Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
       strings.add(scratch);
@@ -107,27 +107,22 @@ public class TestSpecialOperations exten
     // DaciukMihovAutomatonBuilder here
 
     // TODO: what other random things can we do here...
-    Automaton a = BasicOperations.union(automata);
+    LightAutomaton a = BasicOperations.unionLight(automata);
     if (random().nextBoolean()) {
-      Automaton.minimize(a);
+      a = MinimizationOperationsLight.minimize(a);
       if (VERBOSE) {
-        System.out.println("TEST: a.minimize numStates=" + a.getNumberOfStates());
+        System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
       }
     } else if (random().nextBoolean()) {
       if (VERBOSE) {
         System.out.println("TEST: a.determinize");
       }
-      a.determinize();
+      a = BasicOperations.determinize(a);
     } else if (random().nextBoolean()) {
       if (VERBOSE) {
-        System.out.println("TEST: a.reduce");
+        System.out.println("TEST: a.removeDeadTransitions");
       }
-      a.reduce();
-    } else if (random().nextBoolean()) {
-      if (VERBOSE) {
-        System.out.println("TEST: a.getNumberedStates");
-      }
-      a.getNumberedStates();
+      a = BasicOperations.removeDeadTransitions(a);
     }
 
     Set<IntsRef> actual = getFiniteStrings(a, -1, true);

Modified: lucene/dev/branches/lucene5752/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
Wed Jun 11 23:22:10 2014
@@ -46,11 +46,11 @@ import org.apache.lucene.search.spans.Sp
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.LightAutomaton;
 
 /**
  * Support for highlighting multiterm queries in PostingsHighlighter.
@@ -126,10 +126,10 @@ class MultiTermHighlighting {
         int prefixLength = Math.min(fq.getPrefixLength(), termLength);
         String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
         LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
-        Automaton automaton = builder.toAutomaton(fq.getMaxEdits());
+        LightAutomaton automaton = builder.toLightAutomaton(fq.getMaxEdits());
         if (prefixLength > 0) {
-          Automaton prefix = BasicAutomata.makeString(UnicodeUtil.newString(termText, 0,
prefixLength));
-          automaton = BasicOperations.concatenate(prefix, automaton);
+          LightAutomaton prefix = BasicAutomata.makeStringLight(UnicodeUtil.newString(termText,
0, prefixLength));
+          automaton = BasicOperations.concatenateLight(prefix, automaton);
         }
         list.add(new CharacterRunAutomaton(automaton) {
           @Override
@@ -161,7 +161,7 @@ class MultiTermHighlighting {
         final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
         
         // this is *not* an automaton, but its very simple
-        list.add(new CharacterRunAutomaton(BasicAutomata.makeEmpty()) {
+        list.add(new CharacterRunAutomaton(BasicAutomata.makeEmptyLight()) {
           @Override
           public boolean run(char[] s, int offset, int length) {
             scratch.chars = s;

Modified: lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1602049&r1=1602048&r2=1602049&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
(original)
+++ lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
Wed Jun 11 23:22:10 2014
@@ -649,6 +649,61 @@ public class AutomatonTestUtil {
   }
 
   /**
+   * Simple, original implementation of getFiniteStrings.
+   *
+   * <p>Returns the set of accepted strings, assuming that at most
+   * <code>limit</code> strings are accepted. If more than <code>limit</code>

+   * strings are accepted, the first limit strings found are returned. If <code>limit</code>&lt;0,
then 
+   * the limit is infinite.
+   *
+   * <p>This implementation is recursive: it uses one stack
+   * frame for each digit in the returned strings (ie, max
+   * is the max length returned string).
+   */
+  public static Set<IntsRef> getFiniteStringsRecursiveLight(LightAutomaton a, int limit)
{
+    HashSet<IntsRef> strings = new HashSet<>();
+    if (!getFiniteStringsLight(a, 0, new HashSet<Integer>(), strings, new IntsRef(),
limit)) {
+      return strings;
+    }
+    return strings;
+  }
+
+  /**
+   * Returns the strings that can be produced from the given state, or
+   * false if more than <code>limit</code> strings are found. 
+   * <code>limit</code>&lt;0 means "infinite".
+   */
+  private static boolean getFiniteStringsLight(LightAutomaton a, int s, HashSet<Integer>
pathstates, 
+      HashSet<IntsRef> strings, IntsRef path, int limit) {
+    pathstates.add(s);
+    LightAutomaton.Transition t = new LightAutomaton.Transition();
+    int count = a.initTransition(s, t);
+    for (int i=0;i<count;i++) {
+      a.getNextTransition(t);
+      if (pathstates.contains(t.dest)) {
+        return false;
+      }
+      for (int n = t.min; n <= t.max; n++) {
+        path.grow(path.length+1);
+        path.ints[path.length] = n;
+        path.length++;
+        if (a.isAccept(t.dest)) {
+          strings.add(IntsRef.deepCopyOf(path));
+          if (limit >= 0 && strings.size() > limit) {
+            return false;
+          }
+        }
+        if (!getFiniteStringsLight(a, t.dest, pathstates, strings, path, limit)) {
+          return false;
+        }
+        path.length--;
+      }
+    }
+    pathstates.remove(s);
+    return true;
+  }
+
+  /**
    * Returns true if the language of this automaton is finite.
    * <p>
    * WARNING: this method is slow, it will blow up if the automaton is large.
@@ -713,4 +768,13 @@ public class AutomatonTestUtil {
     a.clearNumberedStates(); // force recomputation of cached numbered states
     assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates())
+ " detached states";
   }
+
+  /**
+   * Checks that an automaton has no detached states that are unreachable
+   * from the initial state.
+   */
+  public static void assertNoDetachedStates(LightAutomaton a) {
+    LightAutomaton a2 = BasicOperations.removeDeadTransitions(a);
+    assert a.getNumStates() == a2.getNumStates() : "automaton has " + (a.getNumStates() -
a2.getNumStates()) + " detached states";
+  }
 }



Mime
View raw message