lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r963751 - in /lucene/dev/trunk/lucene: CHANGES.txt src/java/org/apache/lucene/index/MultiTermsEnum.java src/test/org/apache/lucene/index/TestMultiFields.java
Date Tue, 13 Jul 2010 15:03:08 GMT
Author: mikemccand
Date: Tue Jul 13 15:03:07 2010
New Revision: 963751

URL: http://svn.apache.org/viewvc?rev=963751&view=rev
Log:
LUCENE-2130: fix performance issue with MultiTermEnum.seek when you seek just a bit ahead
on each call (which AutomatonFuzzyTermsEnum does)

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=963751&r1=963750&r2=963751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jul 13 15:03:07 2010
@@ -728,6 +728,9 @@ Bug fixes
   files when a mergedSegmentWarmer is set on IndexWriter.  (Mike
   McCandless)
 
+* LUCENE-2130: Fix performance issue when FuzzyQuery runs on a
+  multi-segment index (Michael McCandless)
+
 API Changes
 
 * LUCENE-2281: added doBeforeFlush to IndexWriter to allow extensions to perform

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=963751&r1=963750&r2=963751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Tue Jul 13
15:03:07 2010
@@ -42,6 +42,9 @@ public final class MultiTermsEnum extend
   private final MultiDocsEnum.EnumWithSlice[] subDocs;
   private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
 
+  private BytesRef lastSeek;
+  private final BytesRef lastSeekScratch = new BytesRef();
+
   private int numTop;
   private int numSubs;
   private BytesRef current;
@@ -139,8 +142,40 @@ public final class MultiTermsEnum extend
   public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
     queue.clear();
     numTop = 0;
+
+    boolean seekOpt = false;
+    if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
+      seekOpt = true;
+    }
+    lastSeekScratch.copy(term);
+    lastSeek = lastSeekScratch;
+
     for(int i=0;i<numSubs;i++) {
-      final SeekStatus status = currentSubs[i].terms.seek(term, useCache);
+      final SeekStatus status;
+      // LUCENE-2130: if we had just seek'd already, prior
+      // to this seek, and the new seek term is after the
+      // previous one, don't try to re-seek this sub if its
+      // current term is already beyond this new seek term.
+      // Doing so is a waste because this sub will simply
+      // seek to the same spot.
+      if (seekOpt) {
+        final BytesRef curTerm = currentSubs[i].current;
+        if (curTerm != null) {
+          final int cmp = termComp.compare(term, curTerm);
+          if (cmp == 0) {
+            status = SeekStatus.FOUND;
+          } else if (cmp < 0) {
+            status = SeekStatus.NOT_FOUND;
+          } else {
+            status = currentSubs[i].terms.seek(term, useCache);
+          }
+        } else {
+          status = SeekStatus.END;
+        }
+      } else {
+        status = currentSubs[i].terms.seek(term, useCache);
+      }
+
       if (status == SeekStatus.FOUND) {
         top[numTop++] = currentSubs[i];
         current = currentSubs[i].current = currentSubs[i].terms.term();
@@ -150,6 +185,7 @@ public final class MultiTermsEnum extend
         queue.add(currentSubs[i]);
       } else {
         // enum exhausted
+        currentSubs[i].current = null;
       }
     }
 
@@ -205,6 +241,8 @@ public final class MultiTermsEnum extend
 
   @Override
   public BytesRef next() throws IOException {
+    lastSeek = null;
+
     // restore queue
     pushTop();
 

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java?rev=963751&r1=963750&r2=963751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java Tue Jul
13 15:03:07 2010
@@ -88,14 +88,13 @@ public class TestMultiFields extends Luc
         BytesRef term = terms.get(r.nextInt(terms.size()));
         
         DocsEnum docsEnum = terms2.docs(delDocs, term, null);
-        int count = 0;
+        assertNotNull(docsEnum);
+
         for(int docID : docs.get(term)) {
           if (!deleted.contains(docID)) {
             assertEquals(docID, docsEnum.nextDoc());
-            count++;
           }
         }
-        //System.out.println("c=" + count + " t=" + term);
         assertEquals(docsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
       }
 
@@ -104,6 +103,7 @@ public class TestMultiFields extends Luc
     }
   }
 
+  /*
   private void verify(IndexReader r, String term, List<Integer> expected) throws Exception
{
     DocsEnum docs = MultiFields.getTermDocsEnum(r,
                                                 MultiFields.getDeletedDocs(r),
@@ -115,6 +115,7 @@ public class TestMultiFields extends Luc
     }
     assertEquals(docs.NO_MORE_DOCS, docs.nextDoc());
   }
+  */
 
   public void testSeparateEnums() throws Exception {
     Directory dir = new MockRAMDirectory();
@@ -133,5 +134,4 @@ public class TestMultiFields extends Luc
     r.close();
     dir.close();
   }
-    
 }



Mime
View raw message