lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1058390 [9/16] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contr...
Date Thu, 13 Jan 2011 02:09:56 GMT
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java Thu Jan 13 02:09:33 2011
@@ -38,7 +38,7 @@ import org.apache.lucene.util._TestUtil;
  * and validates the correct number of hits are returned.
  */
 public class TestWildcardRandom extends LuceneTestCase {
-  private Searcher searcher;
+  private IndexSearcher searcher;
   private IndexReader reader;
   private Directory dir;
   

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java Thu Jan 13 02:09:33 2011
@@ -329,7 +329,7 @@ public class TestCustomScoreQuery extend
     }
   }
 
-  private void logResult(String msg, Searcher s, Query q, int doc, float score1) throws IOException {
+  private void logResult(String msg, IndexSearcher s, Query q, int doc, float score1) throws IOException {
     log(msg+" "+score1);
     log("Explain by: "+q);
     log(s.explain(q,doc));

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java Thu Jan 13 02:09:33 2011
@@ -26,16 +26,16 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.QueryUtils;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
@@ -306,8 +306,8 @@ public class TestPayloadNearQuery extend
     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     //Make everything else 1 so we see the effect of the payload
     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    @Override public float lengthNorm(String fieldName, int numTerms) {
-      return 1.0f;
+    @Override public float computeNorm(String fieldName, FieldInvertState state) {
+      return state.getBoost();
     }
 
     @Override public float queryNorm(float sumOfSquaredWeights) {
@@ -325,7 +325,7 @@ public class TestPayloadNearQuery extend
       return 1.0f;
     }
     // idf used for phrase queries
-    @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
+    @Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
       return new IDFExplanation() {
         @Override
         public float getIdf() {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java Thu Jan 13 02:09:33 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -299,8 +300,8 @@ public class TestPayloadTermQuery extend
     //Make everything else 1 so we see the effect of the payload
     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     @Override
-    public float lengthNorm(String fieldName, int numTerms) {
-      return 1;
+    public float computeNorm(String fieldName, FieldInvertState state) {
+      return state.getBoost();
     }
 
     @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java Thu Jan 13 02:09:33 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.analysis.MockAn
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.SlowMultiReaderWrapper;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@@ -168,7 +169,8 @@ public class TestNearSpansOrdered extend
   public void testSpanNearScorerSkipTo1() throws Exception {
     SpanNearQuery q = makeQuery();
     Weight w = q.weight(searcher);
-    Scorer s = w.scorer(searcher.getIndexReader(), true, false);
+    assertTrue(searcher.getTopReaderContext().isAtomic);
+    Scorer s = w.scorer((AtomicReaderContext) searcher.getTopReaderContext(), true, false);
     assertEquals(1, s.advance(1));
   }
   /**
@@ -177,7 +179,8 @@ public class TestNearSpansOrdered extend
    */
   public void testSpanNearScorerExplain() throws Exception {
     SpanNearQuery q = makeQuery();
-    Explanation e = q.weight(searcher).explain(searcher.getIndexReader(), 1);
+    assertTrue(searcher.getTopReaderContext().isAtomic);
+    Explanation e = q.weight(searcher).explain((AtomicReaderContext) searcher.getTopReaderContext(), 1);
     assertTrue("Scorer explanation value for doc#1 isn't positive: "
                + e.toString(),
                0.0f < e.getValue());

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java Thu Jan 13 02:09:33 2011
@@ -24,7 +24,6 @@ import org.apache.lucene.index.RandomInd
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@@ -35,7 +34,7 @@ import org.apache.lucene.util.LuceneTest
 public class TestSpanMultiTermQueryWrapper extends LuceneTestCase {
   private Directory directory;
   private IndexReader reader;
-  private Searcher searcher;
+  private IndexSearcher searcher;
   
   @Override
   public void setUp() throws Exception {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java Thu Jan 13 02:09:33 2011
@@ -18,18 +18,18 @@ package org.apache.lucene.search.spans;
  */
 
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.CheckHits;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.SlowMultiReaderWrapper;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -416,12 +416,12 @@ public class TestSpans extends LuceneTes
                               slop,
                               ordered) {
       @Override
-      public Similarity getSimilarity(Searcher s) {
+      public Similarity getSimilarity(IndexSearcher s) {
         return sim;
       }
-    };
+      };
 
-    Scorer spanScorer = snq.weight(searcher).scorer(new SlowMultiReaderWrapper(searcher.getIndexReader()), true, false);
+    Scorer spanScorer = snq.weight(searcher).scorer(new AtomicReaderContext(new SlowMultiReaderWrapper(searcher.getIndexReader())), true, false);
 
     assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
     assertEquals("first doc number", spanScorer.docID(), 11);
@@ -439,7 +439,7 @@ public class TestSpans extends LuceneTes
   }
 
   // LUCENE-1404
-  private int hitCount(Searcher searcher, String word) throws Throwable {
+  private int hitCount(IndexSearcher searcher, String word) throws Throwable {
     return searcher.search(new TermQuery(new Term("text", word)), 10).totalHits;
   }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java Thu Jan 13 02:09:33 2011
@@ -134,7 +134,7 @@ public class TestSpansAdvanced extends L
    * 
    * @throws IOException
    */
-  protected static void assertHits(Searcher s, Query query,
+  protected static void assertHits(IndexSearcher s, Query query,
       final String description, final String[] expectedIds,
       final float[] expectedScores) throws IOException {
     QueryUtils.check(random, query, s);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java Thu Jan 13 02:09:33 2011
@@ -180,4 +180,22 @@ public class TestRAMDirectory extends Lu
     }
     dir.delete();
   }
+
+  // LUCENE-2852
+  public void testSeekToEOFThenBack() throws Exception {
+    RAMDirectory dir = new RAMDirectory();
+
+    IndexOutput o = dir.createOutput("out");
+    byte[] bytes = new byte[3*RAMInputStream.BUFFER_SIZE];
+    o.writeBytes(bytes, 0, bytes.length);
+    o.close();
+
+    IndexInput i = dir.openInput("out");
+    i.seek(2*RAMInputStream.BUFFER_SIZE-1);
+    i.seek(3*RAMInputStream.BUFFER_SIZE);
+    i.seek(RAMInputStream.BUFFER_SIZE);
+    i.readBytes(bytes, 0, 2*RAMInputStream.BUFFER_SIZE);
+    i.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LineFileDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LineFileDocs.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LineFileDocs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LineFileDocs.java Thu Jan 13 02:09:33 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
  */
 
 import java.io.Closeable;
+import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.BufferedReader;
@@ -26,6 +27,7 @@ import java.io.InputStream;
 import java.io.BufferedInputStream;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.GZIPInputStream;
+import java.util.Random;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -36,21 +38,19 @@ import org.apache.lucene.document.Field;
 public class LineFileDocs implements Closeable {
 
   private BufferedReader reader;
-  private final boolean forever;
   private final static int BUFFER_SIZE = 1 << 16;     // 64K
   private final AtomicInteger id = new AtomicInteger();
   private final String path;
 
   // If forever is true, we rewind the file at EOF (repeat
   // the docs over and over)
-  public LineFileDocs(String path, boolean forever) throws IOException {
+  public LineFileDocs(Random random, String path) throws IOException {
     this.path = path;
-    this.forever = forever;
-    open();
+    open(random);
   }
 
-  public LineFileDocs(boolean forever) throws IOException {
-    this(LuceneTestCase.TEST_LINE_DOCS_FILE, forever);
+  public LineFileDocs(Random random) throws IOException {
+    this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
   }
 
   public synchronized void close() throws IOException {
@@ -60,22 +60,49 @@ public class LineFileDocs implements Clo
     }
   }
 
-  private synchronized void open() throws IOException {
+  private synchronized void open(Random random) throws IOException {
     InputStream is = getClass().getResourceAsStream(path);
     if (is == null) {
       // if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir)
       is = new FileInputStream(path);
     }
+    File file = new File(path);
+    long size;
+    if (file.exists()) {
+      size = file.length();
+    } else {
+      size = is.available();
+    }
     if (path.endsWith(".gz")) {
       is = new GZIPInputStream(is);
+      // guestimate:
+      size *= 2.8;
     }
+
     final InputStream in = new BufferedInputStream(is, BUFFER_SIZE);
     reader = new BufferedReader(new InputStreamReader(in, "UTF-8"), BUFFER_SIZE);
+
+    // Override sizes for currently "known" line files:
+    if (path.equals("europarl.lines.txt.gz")) {
+      size = 15129506L;
+    } else if (path.equals("/home/hudson/lucene-data/enwiki.random.lines.txt.gz")) {
+      size = 3038178822L;
+    }
+
+    // Randomly seek to starting point:
+    if (random != null && size > 3) {
+      final long seekTo = (random.nextLong()&Long.MAX_VALUE) % (size/3);
+      if (LuceneTestCase.VERBOSE) {
+        System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open");
+      }
+      reader.skip(seekTo);
+      reader.readLine();
+    }
   }
 
-  public synchronized void reset() throws IOException {
+  public synchronized void reset(Random random) throws IOException {
     close();
-    open();
+    open(random);
     id.set(0);
   }
 
@@ -117,15 +144,13 @@ public class LineFileDocs implements Clo
     synchronized(this) {
       line = reader.readLine();
       if (line == null) {
-        if (forever) {
-          if (LuceneTestCase.VERBOSE) {
-            System.out.println("TEST: LineFileDocs: now rewind file...");
-          }
-          close();
-          open();
-          line = reader.readLine();
+        // Always rewind at end:
+        if (LuceneTestCase.VERBOSE) {
+          System.out.println("TEST: LineFileDocs: now rewind file...");
         }
-        return null;
+        close();
+        open(null);
+        line = reader.readLine();
       }
     }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java Thu Jan 13 02:09:33 2011
@@ -42,6 +42,7 @@ import org.apache.lucene.index.codecs.Co
 import org.apache.lucene.index.codecs.mockintblock.MockFixedIntBlockCodec;
 import org.apache.lucene.index.codecs.mockintblock.MockVariableIntBlockCodec;
 import org.apache.lucene.index.codecs.mocksep.MockSepCodec;
+import org.apache.lucene.index.codecs.mockrandom.MockRandomCodec;
 import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
 import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
 import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
@@ -199,7 +200,7 @@ public abstract class LuceneTestCase ext
   
   private static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
   
-  private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock"};
+  private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"};
 
   private static void swapCodec(Codec c, CodecProvider cp) {
     Codec prior = null;
@@ -252,6 +253,7 @@ public abstract class LuceneTestCase ext
     swapCodec(new MockFixedIntBlockCodec(codecHasParam && "MockFixedIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 2000)), cp);
     // baseBlockSize cannot be over 127:
     swapCodec(new MockVariableIntBlockCodec(codecHasParam && "MockVariableIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 127)), cp);
+    swapCodec(new MockRandomCodec(random), cp);
 
     return cp.lookup(codec);
   }
@@ -268,9 +270,9 @@ public abstract class LuceneTestCase ext
     cp.unregister(cp.lookup("MockSep"));
     cp.unregister(cp.lookup("MockFixedIntBlock"));
     cp.unregister(cp.lookup("MockVariableIntBlock"));
+    cp.unregister(cp.lookup("MockRandom"));
     swapCodec(new PulsingCodec(1), cp);
     cp.setDefaultFieldCodec(savedDefaultCodec);
-
   }
 
   // randomly picks from core and test codecs
@@ -552,9 +554,7 @@ public abstract class LuceneTestCase ext
           
         if (t.isAlive() && 
             !rogueThreads.containsKey(t) && 
-            t != Thread.currentThread() &&
-            // TODO: TimeLimitingCollector starts a thread statically.... WTF?!
-            !t.getName().equals("TimeLimitedCollector timer thread")) {
+            t != Thread.currentThread()) {
           System.err.println("WARNING: " + context  + " left thread running: " + t);
           rogueThreads.put(t, true);
           rogueCount++;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Thu Jan 13 02:09:33 2011
@@ -20,13 +20,13 @@ package org.apache.lucene.util.automaton
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -122,8 +122,10 @@ public class TestFSTs extends LuceneTest
   }
 
   public void testBasicFSA() throws IOException {
-    String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
+    String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation", "stat"};
+    String[] strings2 = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
     IntsRef[] terms = new IntsRef[strings.length];
+    IntsRef[] terms2 = new IntsRef[strings2.length];
     for(int inputMode=0;inputMode<2;inputMode++) {
       if (VERBOSE) {
         System.out.println("TEST: inputMode=" + inputModeToString(inputMode));
@@ -132,6 +134,10 @@ public class TestFSTs extends LuceneTest
       for(int idx=0;idx<strings.length;idx++) {
         terms[idx] = toIntsRef(strings[idx], inputMode);
       }
+      for(int idx=0;idx<strings2.length;idx++) {
+        terms2[idx] = toIntsRef(strings2[idx], inputMode);
+      }
+      Arrays.sort(terms2);
 
       doTest(inputMode, terms);
     
@@ -141,8 +147,8 @@ public class TestFSTs extends LuceneTest
       {
         final Outputs<Object> outputs = NoOutputs.getSingleton();
         final Object NO_OUTPUT = outputs.getNoOutput();      
-        final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
-        for(IntsRef term : terms) {
+        final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms2.length);
+        for(IntsRef term : terms2) {
           pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
         }
         FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
@@ -154,9 +160,9 @@ public class TestFSTs extends LuceneTest
       // FST ord pos int
       {
         final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
-        final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
-        for(int idx=0;idx<terms.length;idx++) {
-          pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
+        final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
+        for(int idx=0;idx<terms2.length;idx++) {
+          pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
         }
         final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
         assertNotNull(fst);
@@ -168,10 +174,10 @@ public class TestFSTs extends LuceneTest
       {
         final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
         final BytesRef NO_OUTPUT = outputs.getNoOutput();      
-        final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms.length);
-        for(int idx=0;idx<terms.length;idx++) {
+        final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms2.length);
+        for(int idx=0;idx<terms2.length;idx++) {
           final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
-          pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
+          pairs.add(new FSTTester.InputOutput<BytesRef>(terms2[idx], output));
         }
         final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
         assertNotNull(fst);
@@ -322,19 +328,6 @@ public class TestFSTs extends LuceneTest
       }
     }
 
-    private String getRandomString() {
-      final String term;
-      if (random.nextBoolean()) {
-        term = _TestUtil.randomRealisticUnicodeString(random);
-      } else {
-        // we want to mix in limited-alphabet symbols so
-        // we get more sharing of the nodes given how few
-        // terms we are testing...
-        term = simpleRandomString(random);
-      }
-      return term;
-    }
-
     public void doTest() throws IOException {
       // no pruning
       doTest(0, 0);
@@ -346,156 +339,83 @@ public class TestFSTs extends LuceneTest
       doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
     }
 
-    // NOTE: only copies the stuff this test needs!!
-    private FST.Arc<T> copyArc(FST.Arc<T> arc) {
-      final FST.Arc<T> copy = new FST.Arc<T>();
-      copy.label = arc.label;
-      copy.target = arc.target;
-      copy.output = arc.output;
-      copy.nextFinalOutput = arc.nextFinalOutput;
-      return arc;
-    }
-
     // runs the term, returning the output, or null if term
-    // isn't accepted.  if stopNode is non-null it must be
-    // length 2 int array; stopNode[0] will be the last
-    // matching node (-1 if the term is accepted)
-    // and stopNode[1] will be the length of the
-    // term prefix that matches
-    private T run(FST<T> fst, IntsRef term, int[] stopNode) throws IOException {
-      if (term.length == 0) {
-        final T output = fst.getEmptyOutput();
-        if (stopNode != null) {
-          stopNode[1] = 0;
-          if (output != null) {
-            // accepted
-            stopNode[0] = -1;
-          } else {
-            stopNode[0] = fst.getStartNode();
-          }
+    // isn't accepted.  if prefixLength is non-null it must be
+    // length 1 int array; prefixLength[0] is set to the length
+    // of the term prefix that matches
+    private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
+      assert prefixLength == null || prefixLength.length == 1;
+      final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
+      final T NO_OUTPUT = fst.outputs.getNoOutput();
+      T output = NO_OUTPUT;
+
+      for(int i=0;i<=term.length;i++) {
+        final int label;
+        if (i == term.length) {
+          label = FST.END_LABEL;
+        } else {
+          label = term.ints[term.offset+i];
         }
-        return output;
-      }
-
-      final FST.Arc<T> arc = new FST.Arc<T>();
-      int node = fst.getStartNode();
-      int lastNode = -1;
-      T output = fst.outputs.getNoOutput();
-      //System.out.println("match?");
-      for(int i=0;i<term.length;i++) {
-        //System.out.println("  int=" + term.ints[i]);
-        if (!fst.hasArcs(node)) {
-          //System.out.println("    no arcs!");
-          // hit end node before term's end
-          if (stopNode != null) {
-            stopNode[0] = lastNode;
-            stopNode[1] = i-1;
+        //System.out.println("   loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
+        if (fst.findTargetArc(label, arc, arc) == null) {
+          if (prefixLength != null) {
+            prefixLength[0] = i;
             return output;
           } else {
             return null;
           }
         }
-
-        if (fst.findArc(node, term.ints[term.offset + i], arc) != null) {
-          node = arc.target;
-          //System.out.println("    match final?=" + arc.isFinal());
-          if (arc.output != fst.outputs.getNoOutput()) {
-            output = fst.outputs.add(output, arc.output);
-          }
-        } else if (stopNode != null) {
-          stopNode[0] = node;
-          stopNode[1] = i;
-          return output;
-        } else {
-          //System.out.println("    no match");
-          return null;
-        }
-
-        lastNode = node;
-      }
-
-      if (!arc.isFinal()) {
-        // hit term's end before end node
-        if (stopNode != null) {
-          stopNode[0] = node;
-          stopNode[1] = term.length;
-          return output;
-        } else {
-          return null;
-        }
+        output = fst.outputs.add(output, arc.output);
       }
 
-      if (arc.nextFinalOutput != fst.outputs.getNoOutput()) {
-        output = fst.outputs.add(output, arc.nextFinalOutput);
+      if (prefixLength != null) {
+        prefixLength[0] = term.length;
       }
 
-      if (stopNode != null) {
-        stopNode[0] = -1;
-        stopNode[1] = term.length;
-      }
       return output;
     }
 
     private T randomAcceptedWord(FST<T> fst, IntsRef in) throws IOException {
-      int node = fst.getStartNode();
+      FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
 
-      if (fst.noNodes()) {
-        // degenerate FST: only accepts the empty string
-        assertTrue(fst.getEmptyOutput() != null);
-        in.length = 0;
-        return fst.getEmptyOutput();
-      }
       final List<FST.Arc<T>> arcs = new ArrayList<FST.Arc<T>>();
       in.length = 0;
       in.offset = 0;
-      T output = fst.outputs.getNoOutput();
-      //System.out.println("get random");
+      final T NO_OUTPUT = fst.outputs.getNoOutput();
+      T output = NO_OUTPUT;
+
       while(true) {
         // read all arcs:
-        //System.out.println("  n=" + node);
-        int arcAddress = node;
-        FST.Arc<T> arc = new FST.Arc<T>();
-        fst.readFirstArc(arcAddress, arc);
-        arcs.add(copyArc(arc));
+        fst.readFirstTargetArc(arc, arc);
+        arcs.add(new FST.Arc<T>().copyFrom(arc));
         while(!arc.isLast()) {
           fst.readNextArc(arc);
-          arcs.add(copyArc(arc));
+          arcs.add(new FST.Arc<T>().copyFrom(arc));
         }
       
         // pick one
         arc = arcs.get(random.nextInt(arcs.size()));
-
         arcs.clear();
 
+        // accumulate output
+        output = fst.outputs.add(output, arc.output);
+
         // append label
+        if (arc.label == FST.END_LABEL) {
+          break;
+        }
+
         if (in.ints.length == in.length) {
           in.grow(1+in.length);
         }
         in.ints[in.length++] = arc.label;
-
-        output = fst.outputs.add(output, arc.output);
-
-        // maybe stop
-        if (arc.isFinal()) {
-          if (fst.hasArcs(arc.target)) {
-            // final state but it also has outgoing edges
-            if (random.nextBoolean()) {
-              output = fst.outputs.add(output, arc.nextFinalOutput);
-              break;
-            }
-          } else {
-            break;
-          }
-        }
-
-        node = arc.target;
       }
 
       return output;
     }
 
 
-    private FST<T> doTest(int prune1, int prune2) throws IOException {
+    FST<T> doTest(int prune1, int prune2) throws IOException {
       if (VERBOSE) {
         System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
       }
@@ -524,7 +444,7 @@ public class TestFSTs extends LuceneTest
 
       if (VERBOSE && pairs.size() <= 20 && fst != null) {
         PrintStream ps = new PrintStream("out.dot");
-        fst.toDot(ps);
+        Util.toDot(fst, ps);
         ps.close();
         System.out.println("SAVED out.dot");
       }
@@ -566,11 +486,19 @@ public class TestFSTs extends LuceneTest
 
       assertNotNull(fst);
 
-      // make sure all words are accepted
+      // visit valid paris in order -- make sure all words
+      // are accepted, and FSTEnum's next() steps through
+      // them correctly
+      if (VERBOSE) {
+        System.out.println("TEST: check valid terms/next()");
+      }
       {
         IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
         for(InputOutput<T> pair : pairs) {
           IntsRef term = pair.input;
+          if (VERBOSE) {
+            System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output));
+          }
           Object output = run(fst, term, null);
 
           assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
@@ -578,8 +506,8 @@ public class TestFSTs extends LuceneTest
 
           // verify enum's next
           IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
-
-          assertEquals(term, t.input);
+          assertNotNull(t);
+          assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input);
           assertEquals(pair.output, t.output);
         }
         assertNull(fstEnum.next());
@@ -591,6 +519,9 @@ public class TestFSTs extends LuceneTest
       }
 
       // find random matching word and make sure it's valid
+      if (VERBOSE) {
+        System.out.println("TEST: verify random accepted terms");
+      }
       final IntsRef scratch = new IntsRef(10);
       for(int iter=0;iter<500*RANDOM_MULTIPLIER;iter++) {
         T output = randomAcceptedWord(fst, scratch);
@@ -598,10 +529,15 @@ public class TestFSTs extends LuceneTest
         assertEquals(termsMap.get(scratch), output);
       }
     
-      // test single IntsRefFSTEnum.advance:
-      //System.out.println("TEST: verify advance");
+      // test IntsRefFSTEnum.seek:
+      if (VERBOSE) {
+        System.out.println("TEST: verify seek");
+      }
+      IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
       for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
-        final IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+        if (VERBOSE) {
+          System.out.println("TEST: iter=" + iter);
+        }
         if (random.nextBoolean()) {
           // seek to term that doesn't exist:
           while(true) {
@@ -611,15 +547,35 @@ public class TestFSTs extends LuceneTest
               pos = -(pos+1);
               // ok doesn't exist
               //System.out.println("  seek " + inputToString(inputMode, term));
-              final IntsRefFSTEnum.InputOutput<T> seekResult = fstEnum.advance(term);
-              if (pos < pairs.size()) {
+              final IntsRefFSTEnum.InputOutput<T> seekResult;
+              if (random.nextBoolean()) {
+                if (VERBOSE) {
+                  System.out.println("  do non-exist seekFloor term=" + inputToString(inputMode, term));
+                }
+                seekResult = fstEnum.seekFloor(term);
+                pos--;
+              } else {
+                if (VERBOSE) {
+                  System.out.println("  do non-exist seekCeil term=" + inputToString(inputMode, term));
+                }
+                seekResult = fstEnum.seekCeil(term);
+              }
+
+              if (pos != -1 && pos < pairs.size()) {
                 //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
-                assertEquals(pairs.get(pos).input, seekResult.input);
+                assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult);
+                if (VERBOSE) {
+                  System.out.println("    got " + inputToString(inputMode, seekResult.input));
+                }
+                assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input);
                 assertEquals(pairs.get(pos).output, seekResult.output);
               } else {
-                // seeked beyond end
+                // seeked before start or beyond end
                 //System.out.println("seek=" + seekTerm);
                 assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
+                if (VERBOSE) {
+                  System.out.println("    got null");
+                }
               }
 
               break;
@@ -627,24 +583,36 @@ public class TestFSTs extends LuceneTest
           }
         } else {
           // seek to term that does exist:
-          InputOutput pair = pairs.get(random.nextInt(pairs.size()));
-          //System.out.println("  seek " + inputToString(inputMode, pair.input));
-          final IntsRefFSTEnum.InputOutput<T> seekResult = fstEnum.advance(pair.input);
-          assertEquals(pair.input, seekResult.input);
+          InputOutput<T> pair = pairs.get(random.nextInt(pairs.size()));
+          final IntsRefFSTEnum.InputOutput<T> seekResult;
+          if (random.nextBoolean()) {
+            if (VERBOSE) {
+              System.out.println("  do exists seekFloor " + inputToString(inputMode, pair.input));
+            }
+            seekResult = fstEnum.seekFloor(pair.input);
+          } else {
+            if (VERBOSE) {
+              System.out.println("  do exists seekCeil " + inputToString(inputMode, pair.input));
+            }
+            seekResult = fstEnum.seekCeil(pair.input);
+          }
+          assertNotNull(seekResult);
+          assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input);
           assertEquals(pair.output, seekResult.output);
         }
       }
 
       if (VERBOSE) {
-        System.out.println("TEST: mixed next/advance");
+        System.out.println("TEST: mixed next/seek");
       }
 
-      // test mixed next/advance
+      // test mixed next/seek
       for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
         if (VERBOSE) {
           System.out.println("TEST: iter " + iter);
         }
-        final IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+        // reset:
+        fstEnum = new IntsRefFSTEnum<T>(fst);
         int upto = -1;
         while(true) {
           boolean isDone = false;
@@ -660,13 +628,24 @@ public class TestFSTs extends LuceneTest
             for(;attempt<10;attempt++) {
               IntsRef term = toIntsRef(getRandomString(), inputMode);
               if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
-                if (VERBOSE) {
-                  System.out.println("  do non-exist advance(" + inputToString(inputMode, term) + "]");
-                }
                 int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
                 assert pos < 0;
                 upto = -(pos+1);
-                isDone = fstEnum.advance(term) == null;
+
+                if (random.nextBoolean()) {
+                  upto--;
+                  assertTrue(upto != -1);
+                  if (VERBOSE) {
+                    System.out.println("  do non-exist seekFloor(" + inputToString(inputMode, term) + ")");
+                  }
+                  isDone = fstEnum.seekFloor(term) == null;
+                } else {
+                  if (VERBOSE) {
+                    System.out.println("  do non-exist seekCeil(" + inputToString(inputMode, term) + ")");
+                  }
+                  isDone = fstEnum.seekCeil(term) == null;
+                }
+
                 break;
               }
             }
@@ -681,10 +660,17 @@ public class TestFSTs extends LuceneTest
               upto = 0;
             }
 
-            if (VERBOSE) {
-              System.out.println("  do advance(" + inputToString(inputMode, pairs.get(upto).input) + "]");
+            if (random.nextBoolean()) {
+              if (VERBOSE) {
+                System.out.println("  do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
+              }
+              isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
+            } else {
+              if (VERBOSE) {
+                System.out.println("  do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
+              }
+              isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
             }
-            isDone = fstEnum.advance(pairs.get(upto).input) == null;
           }
           if (VERBOSE) {
             if (!isDone) {
@@ -701,6 +687,24 @@ public class TestFSTs extends LuceneTest
             assertFalse(isDone);
             assertEquals(pairs.get(upto).input, fstEnum.current().input);
             assertEquals(pairs.get(upto).output, fstEnum.current().output);
+
+            /*
+            if (upto < pairs.size()-1) {
+              int tryCount = 0;
+              while(tryCount < 10) {
+                final IntsRef t = toIntsRef(getRandomString(), inputMode);
+                if (pairs.get(upto).input.compareTo(t) < 0) {
+                  final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
+                  if (VERBOSE) {
+                    System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
+                  }
+                  assertEquals(expected, fstEnum.beforeNext(t));
+                  break;
+                }
+                tryCount++;
+              }
+            }
+            */
           }
         }
       }
@@ -757,7 +761,9 @@ public class TestFSTs extends LuceneTest
         }
       }
 
-      //System.out.println("TEST: now prune");
+      if (VERBOSE) {
+        System.out.println("TEST: now prune");
+      }
 
       // prune 'em
       final Iterator<Map.Entry<IntsRef,CountMinOutput<T>>> it = prefixes.entrySet().iterator();
@@ -765,7 +771,9 @@ public class TestFSTs extends LuceneTest
         Map.Entry<IntsRef,CountMinOutput<T>> ent = it.next();
         final IntsRef prefix = ent.getKey();
         final CountMinOutput<T> cmo = ent.getValue();
-        //System.out.println("  term=" + inputToString(inputMode, prefix) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf);
+        if (VERBOSE) {
+          System.out.println("  term=" + inputToString(inputMode, prefix) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
+        }
         final boolean keep;
         if (prune1 > 0) {
           keep = cmo.count >= prune1;
@@ -824,14 +832,20 @@ public class TestFSTs extends LuceneTest
       assertNotNull(fst);
 
       // make sure FST only enums valid prefixes
+      if (VERBOSE) {
+        System.out.println("TEST: check pruned enum");
+      }
       IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
-      IntsRefFSTEnum.InputOutput current;
+      IntsRefFSTEnum.InputOutput<T> current;
       while((current = fstEnum.next()) != null) {
-        //System.out.println("  fst enum term=" + inputToString(inputMode, current.input) + " output=" + outputs.outputToString(current.output));
+        if (VERBOSE) {
+          System.out.println("  fstEnum.next term=" + inputToString(inputMode, current.input) + " output=" + outputs.outputToString(current.output));
+        }
         final CountMinOutput cmo = prefixes.get(current.input);
         assertNotNull(cmo);
         assertTrue(cmo.isLeaf || cmo.isFinal);
-        if (cmo.isFinal && !cmo.isLeaf) {
+        //if (cmo.isFinal && !cmo.isLeaf) {
+        if (cmo.isFinal) {
           assertEquals(cmo.finalOutput, current.output);
         } else {
           assertEquals(cmo.output, current.output);
@@ -839,19 +853,24 @@ public class TestFSTs extends LuceneTest
       }
 
       // make sure all non-pruned prefixes are present in the FST
-      final int[] stopNode = new int[2];
+      if (VERBOSE) {
+        System.out.println("TEST: verify all prefixes");
+      }
+      final int[] stopNode = new int[1];
       for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
         if (ent.getKey().length > 0) {
           final CountMinOutput<T> cmo = ent.getValue();
           final T output = run(fst, ent.getKey(), stopNode);
-          //System.out.println("  term=" + inputToString(inputMode, ent.getKey()) + " output=" + outputs.outputToString(cmo.output));
+          if (VERBOSE) {
+            System.out.println("TEST: verify term=" + inputToString(inputMode, ent.getKey()) + " output=" + outputs.outputToString(cmo.output));
+          }
           // if (cmo.isFinal && !cmo.isLeaf) {
           if (cmo.isFinal) {
             assertEquals(cmo.finalOutput, output);
           } else {
             assertEquals(cmo.output, output);
           }
-          assertEquals(ent.getKey().length, stopNode[1]);
+          assertEquals(ent.getKey().length, stopNode[0]);
         }
       }
     }
@@ -859,7 +878,7 @@ public class TestFSTs extends LuceneTest
 
   public void testRandomWords() throws IOException {
     testRandomWords(1000, 5 * RANDOM_MULTIPLIER);
-    //testRandomWords(10, 100);
+    //testRandomWords(20, 100);
   }
 
   private String inputModeToString(int mode) {
@@ -888,7 +907,7 @@ public class TestFSTs extends LuceneTest
     }
   }
 
-  private String getRandomString() {
+  static String getRandomString() {
     final String term;
     if (random.nextBoolean()) {
       term = _TestUtil.randomRealisticUnicodeString(random);
@@ -909,10 +928,10 @@ public class TestFSTs extends LuceneTest
   private static String inputToString(int inputMode, IntsRef term) {
     if (inputMode == 0) {
       // utf8
-      return toBytesRef(term).utf8ToString();
+      return toBytesRef(term).utf8ToString() + " " + term;
     } else {
       // utf32
-      return UnicodeUtil.newString(term.ints, term.offset, term.length);
+      return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
     }
   }
 
@@ -925,12 +944,13 @@ public class TestFSTs extends LuceneTest
       CodecProvider.getDefault().setDefaultFieldCodec("Standard");
     }
 
-    final LineFileDocs docs = new LineFileDocs(false);
+    final LineFileDocs docs = new LineFileDocs(random);
     final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 100 : 1;
     final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
     final File tempDir = _TestUtil.getTempDir("fstlines");
     final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
     final IndexWriter writer = new IndexWriter(dir, conf);
+    writer.setInfoStream(VERBOSE ? System.out : null);
     final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
     Document doc;
     int docCount = 0;
@@ -986,18 +1006,17 @@ public class TestFSTs extends LuceneTest
         // same:
         final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
         for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
-          fstEnum.reset();
           final BytesRef randomTerm = new BytesRef(getRandomString());
         
-          final TermsEnum.SeekStatus seekResult = termsEnum.seek(randomTerm);
-          final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.advance(randomTerm);
-
           if (VERBOSE) {
-            System.out.println("TEST: seek " + randomTerm.utf8ToString());
+            System.out.println("TEST: seek " + randomTerm.utf8ToString() + " " + randomTerm);
           }
 
+          final TermsEnum.SeekStatus seekResult = termsEnum.seek(randomTerm);
+          final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(randomTerm);
+
           if (seekResult == TermsEnum.SeekStatus.END) {
-            assertNull(fstSeekResult);
+            assertNull("got " + (fstSeekResult == null ? "null" : fstSeekResult.input.utf8ToString()) + " but expected null", fstSeekResult);
           } else {
             assertSame(termsEnum, fstEnum, storeOrd);
             for(int nextIter=0;nextIter<10;nextIter++) {
@@ -1011,6 +1030,9 @@ public class TestFSTs extends LuceneTest
                 assertNotNull(fstEnum.next());
                 assertSame(termsEnum, fstEnum, storeOrd);
               } else {
+                if (VERBOSE) {
+                  System.out.println("  end!");
+                }
                 BytesRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
                 if (nextResult != null) {
                   System.out.println("expected null but got: input=" + nextResult.input.utf8ToString() + " output=" + outputs.outputToString(nextResult.output));
@@ -1032,7 +1054,8 @@ public class TestFSTs extends LuceneTest
     if (termsEnum.term() == null) {
       assertNull(fstEnum.current());
     } else {
-      assertEquals(termsEnum.term(), fstEnum.current().input);
+      assertNotNull(fstEnum.current());
+      assertEquals(termsEnum.term().utf8ToString() + " != " + fstEnum.current().input.utf8ToString(), termsEnum.term(), fstEnum.current().input);
       if (storeOrd) {
         // fst stored the ord
         assertEquals(termsEnum.ord(), ((Long) fstEnum.current().output).longValue());
@@ -1095,7 +1118,7 @@ public class TestFSTs extends LuceneTest
         System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
         if (fst.getNodeCount() < 100) {
           PrintStream ps = new PrintStream("out.dot");
-          fst.toDot(ps);
+          Util.toDot(fst, ps);
           ps.close();
           System.out.println("Wrote FST to out.dot");
         }
@@ -1121,7 +1144,7 @@ public class TestFSTs extends LuceneTest
           }
           toIntsRef(w, inputMode, intsRef);
           T expected = getOutput(intsRef, ord);
-          T actual = fst.get(intsRef);
+          T actual = Util.get(fst, intsRef);
           if (actual == null) {
             throw new RuntimeException("unexpected null output on input=" + w);
           }
@@ -1233,4 +1256,57 @@ public class TestFSTs extends LuceneTest
       }.run(limit);
     }
   }
+
+  public void testSingleString() throws Exception {
+    final Outputs<Object> outputs = NoOutputs.getSingleton();
+    final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+    b.add(new BytesRef("foobar"), outputs.getNoOutput());
+    final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
+    assertNull(fstEnum.seekFloor(new BytesRef("foo")));
+    assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
+  }
+
+  public void testSimple() throws Exception {
+
+    // Get outputs -- passing true means FST will share
+    // (delta code) the outputs.  This should result in
+    // smaller FST if the outputs grow monotonically.  But
+    // if numbers are "random", false should give smaller
+    // final size:
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+
+    // Build an FST mapping BytesRef -> Long
+    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+    final BytesRef a = new BytesRef("a");
+    final BytesRef b = new BytesRef("b");
+    final BytesRef c = new BytesRef("c");
+
+    builder.add(a, outputs.get(17));
+    builder.add(b, outputs.get(42));
+    builder.add(c, outputs.get(13824324872317238L));
+
+    final FST<Long> fst = builder.finish();
+
+    assertEquals(13824324872317238L, (long) Util.get(fst, c));
+    assertEquals(42, (long) Util.get(fst, b));
+    assertEquals(17, (long) Util.get(fst, a));
+
+    BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
+    BytesRefFSTEnum.InputOutput<Long> seekResult;
+    seekResult = fstEnum.seekFloor(a);
+    assertNotNull(seekResult);
+    assertEquals(17, (long) seekResult.output);
+
+    // goes to a
+    seekResult = fstEnum.seekFloor(new BytesRef("aa"));
+    assertNotNull(seekResult);
+    assertEquals(17, (long) seekResult.output);
+
+    // goes to b
+    seekResult = fstEnum.seekCeil(new BytesRef("aa"));
+    assertNotNull(seekResult);
+    assertEquals(b, seekResult.input);
+    assertEquals(42, (long) seekResult.output);
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt Thu Jan 13 02:09:33 2011
@@ -9,12 +9,14 @@ API Changes
 
  * LUCENE-2413: Removed the AnalyzerUtil in common/miscellaneous.  (Robert Muir)
 
- * LUCENE-2167,LUCENE-2699,LUCENE-2763: StandardTokenizer/Analyzer in 
-   common/standard/ now implement the Word Break rules from the Unicode 6.0.0
-   Text Segmentation algorithm (UAX#29).  
+ * LUCENE-2167,LUCENE-2699,LUCENE-2763,LUCENE-2847: StandardTokenizer/Analyzer
+   in common/standard/ now implement the Word Break rules from the Unicode 6.0.0
+   Text Segmentation algorithm (UAX#29), covering the full range of Unicode code
+   points, including values from U+FFFF to U+10FFFF
    
-   ClassicTokenizer/Analyzer retains the old StandardTokenizer/Analyzer
-   implementation and behavior.
+   ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/
+   Analyzer implementation and behavior.  Only the Unicode Basic Multilingual
+   Plane (code points from U+0000 to U+FFFF) is covered.
 
    UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the
    relevant RFCs, in addition to implementing the UAX#29 Word Break rules.

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml Thu Jan 13 02:09:33 2011
@@ -38,8 +38,15 @@
 
   <target name="compile-core" depends="jflex-notice, common.compile-core"/>
 
-  <target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-UAX29URLEmailTokenizer,jflex-wiki-tokenizer"/>
+  <target name="jflex" depends="jflex-check,clean-jflex,gen-uax29-supp-macros,
+                                jflex-StandardAnalyzer,jflex-UAX29URLEmailTokenizer,jflex-wiki-tokenizer"/>
 
+  <target name="gen-uax29-supp-macros">
+    <subant target="gen-uax29-supp-macros">
+       <fileset dir="../icu" includes="build.xml"/>
+    </subant>
+  </target>
+  
   <target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
     <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
       <classpath refid="jflex.classpath"/>

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java Thu Jan 13 02:09:33 2011
@@ -1,10 +1,5 @@
 package org.apache.lucene.analysis.pt;
 
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -31,89 +26,14 @@ import org.apache.lucene.util.Version;
  * which is just the plural reduction step of the RSLP
  * algorithm from <i>A Stemming Algorithmm for the Portuguese Language</i>,
  * Orengo et al.
+ * @see RSLPStemmerBase
  */
-public class PortugueseMinimalStemmer {
+public class PortugueseMinimalStemmer extends RSLPStemmerBase {
   
-  private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31,
-      Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois", 
-          "depois","dois","leis"),
-      false);
-  
-  private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31,
-      Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos",
-          "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés",
-          "através", "convés", "ês", "país", "após", "ambas", "ambos",
-          "messias", "depois"), 
-      false);
+  private static final Step pluralStep = 
+    parse(PortugueseMinimalStemmer.class, "portuguese.rslp").get("Plural");
   
   public int stem(char s[], int len) {
-    if (len < 3 || s[len-1] != 's')
-      return len;
-    
-    if (s[len-2] == 'n') {
-      len--;
-      s[len-1] = 'm';
-      return len;
-    }
-    
-    if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') {
-      len--;
-      s[len-2] = 'ã';
-      s[len-1] = 'o';
-      return len;
-    }
-      
-    if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e')
-      if (!(len == 4 && s[0] == 'm')) {
-        len--;
-        s[len-1] = 'o';
-        return len;
-      }
-    
-    if (len >= 4 && s[len-2] == 'i') {
-      if (s[len-3] == 'a')
-        if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) {
-          len--;
-          s[len-1] = 'l';
-          return len;
-        }
-   
-      if (len >= 5 && s[len-3] == 'é') {
-        len--;
-        s[len-2] = 'e';
-        s[len-1] = 'l';
-        return len;
-      }
-    
-      if (len >= 5 && s[len-3] == 'e') {
-        len--;
-        s[len-1] = 'l';
-        return len;
-      }
-    
-      if (len >= 5 && s[len-3] == 'ó') {
-        len--;
-        s[len-2] = 'o';
-        s[len-1] = 'l';
-        return len;
-      }
-  
-      if (!excIS.contains(s, 0, len)) {
-        s[len-1] = 'l';
-        return len;
-      }
-    }
-    
-    if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e')
-      return len - 2;
-    
-    if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e')
-      if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o'))
-        return len - 2;
-      
-    if (excS.contains(s, 0, len))
-      return len;
-    else
-      return len-1;
+    return pluralStep.apply(s, len);
   }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Thu Jan 13 02:09:33 2011
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Saturday, December 4, 2010 12:34:19 PM UTC
-// generated on Sunday, December 5, 2010 12:24:12 AM UTC
+// file version from Wednesday, January 5, 2011 12:34:09 PM UTC
+// generated on Thursday, January 6, 2011 5:09:41 AM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (
@@ -306,6 +306,7 @@ ASCIITLD = "." (
 	| [xX][nN]--[pP]1[aA][iI]
 	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
 	| [xX][nN]--[wW][gG][bB][hH]1[cC]
+	| [xX][nN]--[wW][gG][bB][lL]6[aA]
 	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
 	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
 	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]



Mime
View raw message