lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1141593 - in /lucene/dev/trunk: lucene/src/java/org/apache/lucene/index/ lucene/src/test/org/apache/lucene/index/ lucene/src/test/org/apache/lucene/util/fst/ modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/
Date Thu, 30 Jun 2011 16:05:42 GMT
Author: mikemccand
Date: Thu Jun 30 16:05:42 2011
New Revision: 1141593

URL: http://svn.apache.org/viewvc?rev=1141593&view=rev
Log:
LUCENE-3260: fix wrong result from MultiTermsEnum.next() after seekExact

Added:
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java   (with props)
Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1141593&r1=1141592&r2=1141593&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Jun 30
16:05:42 2011
@@ -43,6 +43,7 @@ public final class MultiTermsEnum extend
   private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
 
   private BytesRef lastSeek;
+  private boolean lastSeekExact;
   private final BytesRef lastSeekScratch = new BytesRef();
 
   private int numTop;
@@ -149,6 +150,7 @@ public final class MultiTermsEnum extend
     }
 
     lastSeek = null;
+    lastSeekExact = true;
 
     for(int i=0;i<numSubs;i++) {
       final boolean status;
@@ -179,6 +181,7 @@ public final class MultiTermsEnum extend
       if (status) {
         top[numTop++] = currentSubs[i];
         current = currentSubs[i].current = currentSubs[i].terms.term();
+        assert term.equals(currentSubs[i].current);
       }
     }
 
@@ -191,6 +194,7 @@ public final class MultiTermsEnum extend
   public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
     queue.clear();
     numTop = 0;
+    lastSeekExact = false;
 
     boolean seekOpt = false;
     if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
@@ -293,6 +297,17 @@ public final class MultiTermsEnum extend
 
   @Override
   public BytesRef next() throws IOException {
+    if (lastSeekExact) {
+      // Must seekCeil at this point, so those subs that
+      // didn't have the term can find the following term.
+      // NOTE: we could save some CPU by only seekCeil the
+      // subs that didn't match the last exact seek... but
+      // most impls short-circuit if you seekCeil to term
+      // they are already on.
+      final SeekStatus status = seekCeil(current);
+      assert status == SeekStatus.FOUND;
+      lastSeekExact = false;
+    }
     lastSeek = null;
 
     // restore queue

Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java?rev=1141593&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java Thu Jun 30
16:05:42 2011
@@ -0,0 +1,143 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestTermsEnum extends LuceneTestCase {
+
+  public void test() throws Exception {
+    final LineFileDocs docs = new LineFileDocs(random);
+    final Directory d = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random, d);
+    final int numDocs = atLeast(10);
+    for(int docCount=0;docCount<numDocs;docCount++) {
+      w.addDocument(docs.nextDoc());
+    }
+    final IndexReader r = w.getReader();
+    w.close();
+
+    final List<BytesRef> terms = new ArrayList<BytesRef>();
+    final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator();
+    BytesRef term;
+    while((term = termsEnum.next()) != null) {
+      terms.add(new BytesRef(term));
+    }
+    if (VERBOSE) {
+      System.out.println("TEST: " + terms.size() + " terms");
+    }
+
+    int upto = -1;
+    final int iters = atLeast(200);
+    for(int iter=0;iter<iters;iter++) {
+      final boolean isEnd;
+      if (upto != -1 && random.nextBoolean()) {
+        // next
+        if (VERBOSE) {
+          System.out.println("TEST: iter next");
+        }
+        isEnd = termsEnum.next() == null;
+        upto++;
+        if (isEnd) {
+          if (VERBOSE) {
+            System.out.println("  end");
+          }
+          assertEquals(upto, terms.size());
+          upto = -1;
+        } else {
+          if (VERBOSE) {
+            System.out.println("  got term=" + termsEnum.term().utf8ToString() + " expected="
+ terms.get(upto).utf8ToString());
+          }
+          assertTrue(upto < terms.size());
+          assertEquals(terms.get(upto), termsEnum.term());
+        }
+      } else {
+
+        final BytesRef target;
+        final String exists;
+        if (random.nextBoolean()) {
+          // likely fake term
+          if (random.nextBoolean()) {
+            target = new BytesRef(_TestUtil.randomSimpleString(random));
+          } else {
+            target = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
+          }
+          exists = "likely not";
+        } else {
+          // real term
+          target = terms.get(random.nextInt(terms.size()));
+          exists = "yes";
+        }
+
+        upto = Collections.binarySearch(terms, target);
+
+        if (random.nextBoolean()) {
+          if (VERBOSE) {
+            System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + "
exists=" + exists);
+          }
+          // seekCeil
+          final TermsEnum.SeekStatus status = termsEnum.seekCeil(target, random.nextBoolean());
+          if (VERBOSE) {
+            System.out.println("  got " + status);
+          }
+          
+          if (upto < 0) {
+            upto = -(upto+1);
+            if (upto >= terms.size()) {
+              assertEquals(TermsEnum.SeekStatus.END, status);
+              upto = -1;
+            } else {
+              assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
+              assertEquals(terms.get(upto), termsEnum.term());
+            }
+          } else {
+            assertEquals(TermsEnum.SeekStatus.FOUND, status);
+            assertEquals(terms.get(upto), termsEnum.term());
+          }
+        } else {
+          if (VERBOSE) {
+            System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + "
exists=" + exists);
+          }
+          // seekExact
+          final boolean result = termsEnum.seekExact(target, false);
+          if (VERBOSE) {
+            System.out.println("  got " + result);
+          }
+          if (upto < 0) {
+            assertFalse(result);
+            upto = -1;
+          } else {
+            assertTrue(result);
+            assertEquals(target, termsEnum.term());
+          }
+        }
+      }
+    }
+
+    r.close();
+    d.close();
+  }
+}

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1141593&r1=1141592&r2=1141593&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java Thu Jun 30 16:05:42
2011
@@ -714,12 +714,12 @@ public class TestFSTs extends LuceneTest
 
             if (random.nextBoolean()) {
               if (VERBOSE) {
-                System.out.println("  do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input)
+ ")");
+                System.out.println("  do seekCeil(" + inputToString(inputMode, pairs.get(upto).input)
+ ")");
               }
               isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
             } else {
               if (VERBOSE) {
-                System.out.println("  do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input)
+ ")");
+                System.out.println("  do seekFloor(" + inputToString(inputMode, pairs.get(upto).input)
+ ")");
               }
               isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
             }

Modified: lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java?rev=1141593&r1=1141592&r2=1141593&view=diff
==============================================================================
--- lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java
(original)
+++ lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java
Thu Jun 30 16:05:42 2011
@@ -27,10 +27,8 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
@@ -798,6 +796,7 @@ public class LuceneTaxonomyWriter implem
     // per step)
 
     while (otherTaxonomiesLeft>0) {
+      // TODO: use a pq here
       String first=null;
       for (int i=0; i<taxonomies.length; i++) {
         if (currentOthers[i]==null) continue;
@@ -819,7 +818,6 @@ public class LuceneTaxonomyWriter implem
         int newordinal = internalAddCategory(cp, cp.length());
         // TODO (Facet): we already had this term in our hands before, in nextTE...
         // // TODO (Facet): no need to make this term?
-        Term t = new Term(Consts.FULL, first);
         for (int i=0; i<taxonomies.length; i++) {
           if (first.equals(currentOthers[i])) {
             // remember the remapping of this ordinal. Note how
@@ -828,8 +826,6 @@ public class LuceneTaxonomyWriter implem
             // like Lucene's merge works, we hope there are few seeks.
             // TODO (Facet): is there a quicker way? E.g., not specifying the
             // next term by name every time?
-            SeekStatus result = othertes[i].seekCeil(t.bytes(), false);
-            assert result == SeekStatus.FOUND;
             otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]),
otherdocsEnum[i]);
             otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
             int origordinal = otherdocsEnum[i].docID();
@@ -847,10 +843,6 @@ public class LuceneTaxonomyWriter implem
         // to be added because it already existed in the main taxonomy.
 
         // TODO (Facet): Again, is there a quicker way?
-        Term t = new Term(Consts.FULL, first);
-        // TODO: fix bug in MTE seekExact and use that instead.
-        SeekStatus result = mainte.seekCeil(t.bytes(), false);
-        assert result == SeekStatus.FOUND; // // TODO (Facet): explicit check / throw exception?
         mainde = mainte.docs(MultiFields.getDeletedDocs(mainreader), mainde);
         mainde.nextDoc(); // TODO (Facet): check?
         int newordinal = mainde.docID();
@@ -859,8 +851,6 @@ public class LuceneTaxonomyWriter implem
         for (int i=0; i<taxonomies.length; i++) {
           if (first.equals(currentOthers[i])) {
             // TODO (Facet): again, is there a quicker way?
-            result = othertes[i].seekCeil(t.bytes(), false);
-            assert result == SeekStatus.FOUND; // TODO (Facet): explicit check / throw exception?
             otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]),
otherdocsEnum[i]);
             otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
             int origordinal = otherdocsEnum[i].docID();



Mime
View raw message