lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1576860 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/fst/ lucene/core/src/test/org/apache/lucene/util/fst/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest...
Date Wed, 12 Mar 2014 19:05:05 GMT
Author: simonw
Date: Wed Mar 12 19:05:05 2014
New Revision: 1576860

URL: http://svn.apache.org/r1576860
Log:
LUCENE-5519: Make queueDepth enforcing optional in TopNSearcher

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1576860&r1=1576859&r2=1576860&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Mar 12 19:05:05 2014
@@ -68,6 +68,14 @@ API Changes
   also simplified the Weight.scorer API by removing the two confusing
   booleans.  (Robert Muir, Uwe Schindler, Mike McCandless)
 
+* LUCENE-5519: TopNSearcher now allows to retrieve incomplete results if the max
+  size of the candidate queue is unknown. The queue can still be bound in order
+  to apply pruning while retrieving the top N but will not throw an exception if
+  too many results are rejected to guarantee an absolutely correct top N result.
+  The TopNSearcher now returns a struct like class that indicates if the result
+  is complete in the sense of the top N or not. Consumers of this API should assert
+  on the completness if the bounded queue size is know ahead of time. (Simon Willnauer)
+
 Optimizations
 
 * LUCENE-5468: HunspellStemFilter uses 10 to 100x less RAM. It also loads

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1576860&r1=1576859&r2=1576860&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
Wed Mar 12 19:05:05 2014
@@ -17,14 +17,20 @@ package org.apache.lucene.util.fst;
  * limitations under the License.
  */
 
-import java.io.*;
-import java.util.*;
-
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
 
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeSet;
+
 /** Static helper methods.
  *
  * @lucene.experimental */
@@ -294,6 +300,13 @@ public final class Util {
 
     TreeSet<FSTPath<T>> queue = null;
 
+    /**
+     * Creates an unbounded TopNSearcher
+     * @param fst the {@link org.apache.lucene.util.fst.FST} to search on
+     * @param topN the number of top scoring entries to retrieve
+     * @param maxQueueDepth the maximum size of the queue of possible top entries
+     * @param comparator the comparator to select the top N
+     */
     public TopNSearcher(FST<T> fst, int topN, int maxQueueDepth, Comparator<T>
comparator) {
       this.fst = fst;
       this.bytesReader = fst.getBytesReader();
@@ -379,9 +392,9 @@ public final class Util {
       }
     }
 
-    public MinResult<T>[] search() throws IOException {
+    public TopResults<T> search() throws IOException {
 
-      final List<MinResult<T>> results = new ArrayList<>();
+      final List<Result<T>> results = new ArrayList<>();
 
       //System.out.println("search topN=" + topN);
 
@@ -422,7 +435,7 @@ public final class Util {
           //System.out.println("    empty string!  cost=" + path.cost);
           // Empty string!
           path.input.length--;
-          results.add(new MinResult<>(path.input, path.cost));
+          results.add(new Result<>(path.input, path.cost));
           continue;
         }
 
@@ -486,10 +499,9 @@ public final class Util {
             T finalOutput = fst.outputs.add(path.cost, path.arc.output);
             if (acceptResult(path.input, finalOutput)) {
               //System.out.println("    add result: " + path);
-              results.add(new MinResult<>(path.input, finalOutput));
+              results.add(new Result(path.input, finalOutput));
             } else {
               rejectCount++;
-              assert rejectCount + topN <= maxQueueDepth: "maxQueueDepth (" + maxQueueDepth
+ ") is too small for topN (" + topN + "): rejected " + rejectCount + " paths";
             }
             break;
           } else {
@@ -500,10 +512,7 @@ public final class Util {
           }
         }
       }
-    
-      @SuppressWarnings({"rawtypes","unchecked"}) final MinResult<T>[] arr =
-        (MinResult<T>[]) new MinResult[results.size()];
-      return results.toArray(arr);
+      return new TopResults(rejectCount + topN <= maxQueueDepth, results);
     }
 
     protected boolean acceptResult(IntsRef input, T output) {
@@ -513,18 +522,47 @@ public final class Util {
 
   /** Holds a single input (IntsRef) + output, returned by
    *  {@link #shortestPaths shortestPaths()}. */
-  public final static class MinResult<T> {
+  public final static class Result<T> {
     public final IntsRef input;
     public final T output;
-    public MinResult(IntsRef input, T output) {
+    public Result(IntsRef input, T output) {
       this.input = input;
       this.output = output;
     }
   }
 
+
+  /**
+   * Holds the results for a top N search using {@link TopNSearcher}
+   */
+  public static final class TopResults<T> implements Iterable<Result<T>>
{
+
+    /**
+     * <code>true</code> iff this is a complete result ie. if
+     * the specified queue size was large enough to find the complete list of results. This
might
+     * be <code>false</code> if the {@link TopNSearcher} rejected too many results.
+     */
+    public final boolean isComplete;
+    /**
+     * The top results
+     */
+    public final List<Result<T>> topN;
+
+    TopResults(boolean isComplete, List<Result<T>> topN) {
+      this.topN = topN;
+      this.isComplete = isComplete;
+    }
+
+    @Override
+    public Iterator<Result<T>> iterator() {
+      return topN.iterator();
+    }
+  }
+
+
   /** Starting from node, find the top N min cost 
    *  completions to a final node. */
-  public static <T> MinResult<T>[] shortestPaths(FST<T> fst, FST.Arc<T>
fromNode, T startOutput, Comparator<T> comparator, int topN,
+  public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T>
fromNode, T startOutput, Comparator<T> comparator, int topN,
                                                  boolean allowEmptyString) throws IOException
{
 
     // All paths are kept, so we can pass topN for

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1576860&r1=1576859&r2=1576860&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
Wed Mar 12 19:05:05 2014
@@ -17,17 +17,6 @@ package org.apache.lucene.util.fst;
  * limitations under the License.
  */
 
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.util.*;
-
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -51,9 +40,9 @@ import org.apache.lucene.store.MockDirec
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
@@ -62,8 +51,32 @@ import org.apache.lucene.util.fst.BytesR
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.Util.Result;
 import org.apache.lucene.util.packed.PackedInts;
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.atomic.AtomicInteger;
+
 import static org.apache.lucene.util.fst.FSTTester.getRandomString;
 import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
 import static org.apache.lucene.util.fst.FSTTester.toIntsRef;
@@ -478,7 +491,7 @@ public class TestFSTs extends LuceneTest
           ord++;
           if (ord % 500000 == 0) {
             System.out.println(
-                String.format(Locale.ROOT, 
+                String.format(Locale.ROOT,
                     "%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord));
           }
           if (ord >= limit) {
@@ -645,7 +658,7 @@ public class TestFSTs extends LuceneTest
       }
       idx++;
     }
-    
+
     if (wordsFileIn == null) {
       System.err.println("No input file.");
       System.exit(-1);
@@ -712,7 +725,7 @@ public class TestFSTs extends LuceneTest
     assertNull(fstEnum.seekFloor(new BytesRef("foo")));
     assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
   }
-  
+
 
   public void testDuplicateFSAString() throws Exception {
     String str = "foobar";
@@ -723,15 +736,15 @@ public class TestFSTs extends LuceneTest
       b.add(Util.toIntsRef(new BytesRef(str), ints), outputs.getNoOutput());
     }
     FST<Object> fst = b.finish();
-    
+
     // count the input paths
-    int count = 0; 
+    int count = 0;
     final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<>(fst);
     while(fstEnum.next()!=null) {
-      count++;  
+      count++;
     }
     assertEquals(1, count);
-    
+
     assertNotNull(Util.get(fst, new BytesRef(str)));
     assertNull(Util.get(fst, new BytesRef("foobaz")));
   }
@@ -840,7 +853,7 @@ public class TestFSTs extends LuceneTest
       Document doc = new Document();
       Field idField = newStringField("id", "", Field.Store.NO);
       doc.add(idField);
-      
+
       final int NUM_IDS = atLeast(200);
       //final int NUM_IDS = (int) (377 * (1.0+random.nextDouble()));
       if (VERBOSE) {
@@ -970,7 +983,7 @@ public class TestFSTs extends LuceneTest
     Document doc = new Document();
     Field f = newStringField("field", "", Field.Store.NO);
     doc.add(f);
-      
+
     final int NUM_TERMS = (int) (1000*RANDOM_MULTIPLIER * (1+random().nextDouble()));
     if (VERBOSE) {
       System.out.println("TEST: NUM_TERMS=" + NUM_TERMS);
@@ -1016,8 +1029,8 @@ public class TestFSTs extends LuceneTest
   /**
    * Test state expansion (array format) on close-to-root states. Creates
    * synthetic input that has one expanded state on each level.
-   * 
-   * @see "https://issues.apache.org/jira/browse/LUCENE-2933" 
+   *
+   * @see "https://issues.apache.org/jira/browse/LUCENE-2933"
    */
   public void testExpandedCloseToRoot() throws Exception {
     class SyntheticData {
@@ -1037,10 +1050,10 @@ public class TestFSTs extends LuceneTest
           term.copyChars(w);
           b.add(Util.toIntsRef(term, scratchIntsRef), nothing);
         }
-        
+
         return b.finish();
       }
-      
+
       void generate(ArrayList<String> out, StringBuilder b, char from, char to,
           int depth) {
         if (depth == 0 || from == to) {
@@ -1055,12 +1068,12 @@ public class TestFSTs extends LuceneTest
         }
       }
 
-      public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)

+      public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)
         throws IOException {
         if (FST.targetHasArcs(arc)) {
           int childCount = 0;
           BytesReader fstReader = fst.getBytesReader();
-          for (arc = fst.readFirstTargetArc(arc, arc, fstReader);; 
+          for (arc = fst.readFirstTargetArc(arc, arc, fstReader);;
                arc = fst.readNextArc(arc, fstReader), childCount++)
           {
             boolean expanded = fst.isExpandedTarget(arc, fstReader);
@@ -1068,7 +1081,7 @@ public class TestFSTs extends LuceneTest
 
             assertEquals(
                 expanded,
-                (depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE && 
+                (depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE &&
                     children >= FST.FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
                  children >= FST.FIXED_ARRAY_NUM_ARCS_DEEP);
             if (arc.isLast()) break;
@@ -1123,7 +1136,7 @@ public class TestFSTs extends LuceneTest
     Util.toDot(fst, w, false, false);
     w.close();
     //System.out.println(w.toString());
-    
+
     // check for accept state at label t
     assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
     // check for accept state at label n
@@ -1171,7 +1184,7 @@ public class TestFSTs extends LuceneTest
     //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
     Util.toDot(fst, w, false, false);
     w.close();
-    
+
     checkStopNodes(fst, outputs);
 
     // Make sure it still works after save/load:
@@ -1204,12 +1217,12 @@ public class TestFSTs extends LuceneTest
     assertFalse(arc.isFinal());
     assertEquals(42, arc.output.longValue());
   }
-  
+
   static final Comparator<Long> minLongComparator = new Comparator<Long> () {
     @Override
     public int compare(Long left, Long right) {
       return left.compareTo(right);
-    }  
+    }
   };
 
   public void testShortestPaths() throws Exception {
@@ -1225,32 +1238,82 @@ public class TestFSTs extends LuceneTest
     //Util.toDot(fst, w, false, false);
     //w.close();
 
-    Util.MinResult<Long>[] r = Util.shortestPaths(fst,
+    Util.TopResults<Long> res = Util.shortestPaths(fst,
                                                   fst.getFirstArc(new FST.Arc<Long>()),
                                                   outputs.getNoOutput(),
                                                   minLongComparator,
                                                   3,
                                                   true);
-    assertEquals(3, r.length);
+    assertTrue(res.isComplete);
+    assertEquals(3, res.topN.size());
+    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
+    assertEquals(7L, res.topN.get(0).output.longValue());
+
+    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
+    assertEquals(17L,res.topN.get(1).output.longValue());
 
-    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
-    assertEquals(7L, r[0].output.longValue());
+    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
+    assertEquals(22L, res.topN.get(2).output.longValue());
+  }
+
+  public void testRejectNoLimits() throws IOException {
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
+    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
 
-    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
-    assertEquals(17L, r[1].output.longValue());
+    final IntsRef scratch = new IntsRef();
+    builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
+    builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
+    builder.add(Util.toIntsRef(new BytesRef("adcd"), scratch), 17L);
+    builder.add(Util.toIntsRef(new BytesRef("adcde"), scratch), 17L);
 
-    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
-    assertEquals(22L, r[2].output.longValue());
+    builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
+    final FST<Long> fst = builder.finish();
+    final AtomicInteger rejectCount = new AtomicInteger();
+    Util.TopNSearcher<Long> searcher = new Util.TopNSearcher<Long>(fst, 2, 6,
minLongComparator) {
+      @Override
+      protected boolean acceptResult(IntsRef input, Long output) {
+        boolean accept = output.intValue() == 7;
+        if (!accept) {
+          rejectCount.incrementAndGet();
+        }
+        return accept;
+      }
+    };
+
+    searcher.addStartPaths(fst.getFirstArc(new FST.Arc<Long>()),  outputs.getNoOutput(),
true, new IntsRef());
+    Util.TopResults<Long> res = searcher.search();
+    assertEquals(rejectCount.get(), 4);
+    assertTrue(res.isComplete); // rejected(4) + topN(2) <= maxQueueSize(6)
+
+    assertEquals(1, res.topN.size());
+    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
+    assertEquals(7L, res.topN.get(0).output.longValue());
+    rejectCount.set(0);
+    searcher = new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) {
+      @Override
+      protected boolean acceptResult(IntsRef input, Long output) {
+        boolean accept = output.intValue() == 7;
+        if (!accept) {
+          rejectCount.incrementAndGet();
+        }
+        return accept;
+      }
+    };
+
+    searcher.addStartPaths(fst.getFirstArc(new FST.Arc<Long>()),  outputs.getNoOutput(),
true, new IntsRef());
+    res = searcher.search();
+    assertEquals(rejectCount.get(), 4);
+    assertFalse(res.isComplete); // rejected(4) + topN(2) > maxQueueSize(5)
   }
-  
+
   // compares just the weight side of the pair
   static final Comparator<Pair<Long,Long>> minPairWeightComparator = new Comparator<Pair<Long,Long>>
() {
     @Override
     public int compare(Pair<Long,Long> left, Pair<Long,Long> right) {
       return left.output1.compareTo(right.output1);
-    }  
+    }
   };
-  
+
   /** like testShortestPaths, but uses pairoutputs so we have both a weight and an output
*/
   public void testShortestPathsWFST() throws Exception {
 
@@ -1258,7 +1321,7 @@ public class TestFSTs extends LuceneTest
         PositiveIntOutputs.getSingleton(), // weight
         PositiveIntOutputs.getSingleton()  // output
     );
-    
+
     final Builder<Pair<Long,Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1,
outputs);
 
     final IntsRef scratch = new IntsRef();
@@ -1270,38 +1333,39 @@ public class TestFSTs extends LuceneTest
     //Util.toDot(fst, w, false, false);
     //w.close();
 
-    Util.MinResult<Pair<Long,Long>>[] r = Util.shortestPaths(fst,
+    Util.TopResults<Pair<Long,Long>> res = Util.shortestPaths(fst,
                                                              fst.getFirstArc(new FST.Arc<Pair<Long,Long>>()),
                                                              outputs.getNoOutput(),
                                                              minPairWeightComparator,
                                                              3,
                                                              true);
-    assertEquals(3, r.length);
+    assertTrue(res.isComplete);
+    assertEquals(3, res.topN.size());
 
-    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
-    assertEquals(7L, r[0].output.output1.longValue()); // weight
-    assertEquals(36L, r[0].output.output2.longValue()); // output
-
-    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
-    assertEquals(17L, r[1].output.output1.longValue()); // weight
-    assertEquals(85L, r[1].output.output2.longValue()); // output
-
-    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
-    assertEquals(22L, r[2].output.output1.longValue()); // weight
-    assertEquals(57L, r[2].output.output2.longValue()); // output
+    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
+    assertEquals(7L, res.topN.get(0).output.output1.longValue()); // weight
+    assertEquals(36L, res.topN.get(0).output.output2.longValue()); // output
+
+    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
+    assertEquals(17L, res.topN.get(1).output.output1.longValue()); // weight
+    assertEquals(85L, res.topN.get(1).output.output2.longValue()); // output
+
+    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
+    assertEquals(22L, res.topN.get(2).output.output1.longValue()); // weight
+    assertEquals(57L, res.topN.get(2).output.output2.longValue()); // output
   }
-  
+
   public void testShortestPathsRandom() throws Exception {
     final Random random = random();
     int numWords = atLeast(1000);
-    
+
     final TreeMap<String,Long> slowCompletor = new TreeMap<>();
     final TreeSet<String> allPrefixes = new TreeSet<>();
-    
+
     final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
     final IntsRef scratch = new IntsRef();
-    
+
     for (int i = 0; i < numWords; i++) {
       String s;
       while (true) {
@@ -1310,32 +1374,32 @@ public class TestFSTs extends LuceneTest
           break;
         }
       }
-      
+
       for (int j = 1; j < s.length(); j++) {
         allPrefixes.add(s.substring(0, j));
       }
       int weight = TestUtil.nextInt(random, 1, 100); // weights 1..100
       slowCompletor.put(s, (long)weight);
     }
-    
+
     for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
       //System.out.println("add: " + e);
       builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), e.getValue());
     }
-    
+
     final FST<Long> fst = builder.finish();
     //System.out.println("SAVE out.dot");
     //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
     //Util.toDot(fst, w, false, false);
     //w.close();
-    
+
     BytesReader reader = fst.getBytesReader();
-    
+
     //System.out.println("testing: " + allPrefixes.size() + " prefixes");
     for (String prefix : allPrefixes) {
       // 1. run prefix against fst, then complete by value
       //System.out.println("TEST: " + prefix);
-    
+
       long prefixOutput = 0;
       FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
       for(int idx=0;idx<prefix.length();idx++) {
@@ -1347,16 +1411,17 @@ public class TestFSTs extends LuceneTest
 
       final int topN = TestUtil.nextInt(random, 1, 10);
 
-      Util.MinResult<Long>[] r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(),
minLongComparator, topN, true);
+      Util.TopResults<Long> r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(),
minLongComparator, topN, true);
+      assertTrue(r.isComplete);
 
       // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
-      final List<Util.MinResult<Long>> matches = new ArrayList<>();
+      final List<Result<Long>> matches = new ArrayList<>();
 
       // TODO: could be faster... but its slowCompletor for a reason
       for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
         if (e.getKey().startsWith(prefix)) {
           //System.out.println("  consider " + e.getKey());
-          matches.add(new Util.MinResult<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())),
new IntsRef()),
+          matches.add(new Result<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())),
new IntsRef()),
                                          e.getValue() - prefixOutput));
         }
       }
@@ -1367,24 +1432,24 @@ public class TestFSTs extends LuceneTest
         matches.subList(topN, matches.size()).clear();
       }
 
-      assertEquals(matches.size(), r.length);
+      assertEquals(matches.size(), r.topN.size());
 
-      for(int hit=0;hit<r.length;hit++) {
+      for(int hit=0;hit<r.topN.size();hit++) {
         //System.out.println("  check hit " + hit);
-        assertEquals(matches.get(hit).input, r[hit].input);
-        assertEquals(matches.get(hit).output, r[hit].output);
+        assertEquals(matches.get(hit).input, r.topN.get(hit).input);
+        assertEquals(matches.get(hit).output, r.topN.get(hit).output);
       }
     }
   }
 
-  private static class TieBreakByInputComparator<T> implements Comparator<Util.MinResult<T>>
{
+  private static class TieBreakByInputComparator<T> implements Comparator<Result<T>>
{
     private final Comparator<T> comparator;
     public TieBreakByInputComparator(Comparator<T> comparator) {
       this.comparator = comparator;
     }
 
     @Override
-    public int compare(Util.MinResult<T> a, Util.MinResult<T> b) {
+    public int compare(Result<T> a, Result<T> b) {
       int cmp = comparator.compare(a.output, b.output);
       if (cmp == 0) {
         return a.input.compareTo(b.input);
@@ -1404,21 +1469,21 @@ public class TestFSTs extends LuceneTest
       this.b = b;
     }
   }
-  
+
   /** like testShortestPathsRandom, but uses pairoutputs so we have both a weight and an
output */
   public void testShortestPathsWFSTRandom() throws Exception {
     int numWords = atLeast(1000);
-    
+
     final TreeMap<String,TwoLongs> slowCompletor = new TreeMap<>();
     final TreeSet<String> allPrefixes = new TreeSet<>();
-    
+
     PairOutputs<Long,Long> outputs = new PairOutputs<>(
         PositiveIntOutputs.getSingleton(), // weight
         PositiveIntOutputs.getSingleton()  // output
     );
     final Builder<Pair<Long,Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1,
outputs);
     final IntsRef scratch = new IntsRef();
-    
+
     Random random = random();
     for (int i = 0; i < numWords; i++) {
       String s;
@@ -1428,7 +1493,7 @@ public class TestFSTs extends LuceneTest
           break;
         }
       }
-      
+
       for (int j = 1; j < s.length(); j++) {
         allPrefixes.add(s.substring(0, j));
       }
@@ -1436,27 +1501,27 @@ public class TestFSTs extends LuceneTest
       int output = TestUtil.nextInt(random, 0, 500); // outputs 0..500
       slowCompletor.put(s, new TwoLongs(weight, output));
     }
-    
+
     for (Map.Entry<String,TwoLongs> e : slowCompletor.entrySet()) {
       //System.out.println("add: " + e);
       long weight = e.getValue().a;
       long output = e.getValue().b;
       builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), outputs.newPair(weight,
output));
     }
-    
+
     final FST<Pair<Long,Long>> fst = builder.finish();
     //System.out.println("SAVE out.dot");
     //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
     //Util.toDot(fst, w, false, false);
     //w.close();
-    
+
     BytesReader reader = fst.getBytesReader();
-    
+
     //System.out.println("testing: " + allPrefixes.size() + " prefixes");
     for (String prefix : allPrefixes) {
       // 1. run prefix against fst, then complete by value
       //System.out.println("TEST: " + prefix);
-    
+
       Pair<Long,Long> prefixOutput = outputs.getNoOutput();
       FST.Arc<Pair<Long,Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long,Long>>());
       for(int idx=0;idx<prefix.length();idx++) {
@@ -1468,17 +1533,17 @@ public class TestFSTs extends LuceneTest
 
       final int topN = TestUtil.nextInt(random, 1, 10);
 
-      Util.MinResult<Pair<Long,Long>>[] r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(),
minPairWeightComparator, topN, true);
-
+      Util.TopResults<Pair<Long,Long>> r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(),
minPairWeightComparator, topN, true);
+      assertTrue(r.isComplete);
       // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
-      final List<Util.MinResult<Pair<Long,Long>>> matches = new ArrayList<>();
+      final List<Result<Pair<Long,Long>>> matches = new ArrayList<>();
 
       // TODO: could be faster... but its slowCompletor for a reason
       for (Map.Entry<String,TwoLongs> e : slowCompletor.entrySet()) {
         if (e.getKey().startsWith(prefix)) {
           //System.out.println("  consider " + e.getKey());
-          matches.add(new Util.MinResult<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())),
new IntsRef()),
-                                                          outputs.newPair(e.getValue().a
- prefixOutput.output1, e.getValue().b - prefixOutput.output2)));
+          matches.add(new Result<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())),
new IntsRef()),
+                                                  outputs.newPair(e.getValue().a - prefixOutput.output1,
e.getValue().b - prefixOutput.output2)));
         }
       }
 
@@ -1488,12 +1553,12 @@ public class TestFSTs extends LuceneTest
         matches.subList(topN, matches.size()).clear();
       }
 
-      assertEquals(matches.size(), r.length);
+      assertEquals(matches.size(), r.topN.size());
 
-      for(int hit=0;hit<r.length;hit++) {
+      for(int hit=0;hit<r.topN.size();hit++) {
         //System.out.println("  check hit " + hit);
-        assertEquals(matches.get(hit).input, r[hit].input);
-        assertEquals(matches.get(hit).output, r[hit].output);
+        assertEquals(matches.get(hit).input, r.topN.get(hit).input);
+        assertEquals(matches.get(hit).output, r.topN.get(hit).output);
       }
     }
   }
@@ -1525,4 +1590,5 @@ public class TestFSTs extends LuceneTest
       }
     }
   }
+
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1576860&r1=1576859&r2=1576860&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
Wed Mar 12 19:05:05 2014
@@ -17,15 +17,6 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
@@ -40,6 +31,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicOperations;
@@ -48,14 +40,23 @@ import org.apache.lucene.util.automaton.
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.fst.Util.MinResult;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.fst.Util.Result;
+import org.apache.lucene.util.fst.Util.TopResults;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 /**
  * Suggester that first analyzes the surface form, adds the
@@ -709,7 +710,8 @@ public class AnalyzingSuggester extends 
           }
         }
 
-        MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
+        TopResults<Pair<Long,BytesRef>> completions = searcher.search();
+        assert completions.isComplete;
 
         // NOTE: this is rather inefficient: we enumerate
         // every matching "exactly the same analyzed form"
@@ -723,7 +725,7 @@ public class AnalyzingSuggester extends 
         // seach: it's bounded by how many prefix start
         // nodes we have and the
         // maxSurfaceFormsPerAnalyzedForm:
-        for(MinResult<Pair<Long,BytesRef>> completion : completions) {
+        for(Result<Pair<Long,BytesRef>> completion : completions) {
           BytesRef output2 = completion.output.output2;
           if (sameSurfaceForm(utf8Key, output2)) {
             results.add(getLookupResult(completion.output.output1, output2, spare));
@@ -778,9 +780,10 @@ public class AnalyzingSuggester extends 
         searcher.addStartPaths(path.fstNode, path.output, true, path.input);
       }
 
-      MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
+      TopResults<Pair<Long,BytesRef>> completions = searcher.search();
+      assert completions.isComplete;
 
-      for(MinResult<Pair<Long,BytesRef>> completion : completions) {
+      for(Result<Pair<Long,BytesRef>> completion : completions) {
 
         LookupResult result = getLookupResult(completion.output.output1, completion.output.output2,
spare);
 

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java?rev=1576860&r1=1576859&r2=1576860&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
Wed Mar 12 19:05:05 2014
@@ -21,17 +21,6 @@ package org.apache.lucene.search.suggest
 //   - test w/ syns
 //   - add pruning of low-freq ngrams?
 
-import java.io.File;
-import java.io.IOException;
-//import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.AnalyzerWrapper;
 import org.apache.lucene.analysis.TokenStream;
@@ -64,17 +53,30 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
-import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.fst.Util.MinResult;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.fst.Util.Result;
+import org.apache.lucene.util.fst.Util.TopResults;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+//import java.io.PrintWriter;
 
 /**
  * Builds an ngram model from the text sent to {@link
@@ -612,7 +614,7 @@ public class FreeTextSuggester extends L
         CharsRef spare = new CharsRef();
         
         // complete top-N
-        MinResult<Long> completions[] = null;
+        TopResults<Long> completions = null;
         try {
           
           // Because we store multiple models in one FST
@@ -659,6 +661,7 @@ public class FreeTextSuggester extends L
           searcher.addStartPaths(arc, prefixOutput, true, new IntsRef());
           
           completions = searcher.search();
+          assert completions.isComplete;
         } catch (IOException bogus) {
           throw new RuntimeException(bogus);
         }
@@ -669,7 +672,7 @@ public class FreeTextSuggester extends L
         //System.out.println("    " + completions.length + " completions");
         
         nextCompletion:
-          for (MinResult<Long> completion : completions) {
+          for (Result<Long> completion : completions) {
             token.length = prefixLength;
             // append suffix
             Util.toBytesRef(completion.input, suffix);

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java?rev=1576860&r1=1576859&r2=1576860&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
Wed Mar 12 19:05:05 2014
@@ -17,12 +17,6 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-
 import org.apache.lucene.search.suggest.InputIterator;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.SortedInputIterator;
@@ -34,15 +28,22 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
-import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.fst.Util.MinResult;
+import org.apache.lucene.util.fst.Util.Result;
+import org.apache.lucene.util.fst.Util.TopResults;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
 
 /**
  * Suggester based on a weighted FST: it first traverses the prefix, 
@@ -176,15 +177,16 @@ public class WFSTCompletionLookup extend
     }
 
     // complete top-N
-    MinResult<Long> completions[] = null;
+    TopResults<Long> completions = null;
     try {
       completions = Util.shortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
+      assert completions.isComplete;
     } catch (IOException bogus) {
       throw new RuntimeException(bogus);
     }
     
     BytesRef suffix = new BytesRef(8);
-    for (MinResult<Long> completion : completions) {
+    for (Result<Long> completion : completions) {
       scratch.length = prefixLength;
       // append suffix
       Util.toBytesRef(completion.input, suffix);



Mime
View raw message