lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r774846 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/MultiSegmentReader.java src/java/org/apache/lucene/index/SegmentMergeInfo.java
Date Thu, 14 May 2009 16:40:23 GMT
Author: yonik
Date: Thu May 14 16:40:23 2009
New Revision: 774846

URL: http://svn.apache.org/viewvc?rev=774846&view=rev
Log:
LUCENE-1596: MultiTermDocs speedup when set with MultiTermDocs.seek(MultiTermEnum)

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMergeInfo.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=774846&r1=774845&r2=774846&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu May 14 16:40:23 2009
@@ -345,6 +345,9 @@
     score should then call Scorer.score() per hit inside
     collect(). (Shai Erera via Mike McCandless)
 
+ 8. LUCENE-1596: MultiTermDocs speedup when set with
+    MultiTermDocs.seek(MultiTermEnum) (yonik)
+
 Documentation
 
 Build

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java?rev=774846&r1=774845&r2=774846&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java Thu May 14
16:40:23 2009
@@ -533,10 +533,12 @@
   
     private Term term;
     private int docFreq;
-  
+    final SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
+
     public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
       throws IOException {
       queue = new SegmentMergeQueue(readers.length);
+      matchingSegments = new SegmentMergeInfo[readers.length+1];
       for (int i = 0; i < readers.length; i++) {
         IndexReader reader = readers[i];
         TermEnum termEnum;
@@ -547,6 +549,7 @@
           termEnum = reader.terms();
   
         SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
+        smi.ord = i;
         if (t == null ? smi.next() : termEnum.term() != null)
           queue.put(smi);          // initialize queue
         else
@@ -559,7 +562,20 @@
     }
   
     public boolean next() throws IOException {
+      for (int i=0; i<matchingSegments.length; i++) {
+        SegmentMergeInfo smi = matchingSegments[i];
+        if (smi==null) break;
+        if (smi.next())
+          queue.put(smi);
+        else
+          smi.close(); // done with segment
+      }
+      
+      int numMatchingSegments = 0;
+      matchingSegments[0] = null;
+
       SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+
       if (top == null) {
         term = null;
         return false;
@@ -569,14 +585,13 @@
       docFreq = 0;
   
       while (top != null && term.compareTo(top.term) == 0) {
+        matchingSegments[numMatchingSegments++] = top;
         queue.pop();
         docFreq += top.termEnum.docFreq();    // increment freq
-        if (top.next())
-          queue.put(top);          // restore queue
-        else
-          top.close();          // done with a segment
         top = (SegmentMergeInfo)queue.top();
       }
+
+      matchingSegments[numMatchingSegments] = null;
       return true;
     }
   
@@ -603,7 +618,11 @@
   
     private TermDocs[] readerTermDocs;
     protected TermDocs current;              // == readerTermDocs[pointer]
-  
+
+    private MultiTermEnum tenum;  // the term enum used for seeking... can be null
+    int matchingSegmentPos;  // position into the matching segments from tenum
+    SegmentMergeInfo smi;     // current segment mere info... can be null
+
     public MultiTermDocs(IndexReader[] r, int[] s) {
       readers = r;
       starts = s;
@@ -623,10 +642,16 @@
       this.base = 0;
       this.pointer = 0;
       this.current = null;
+      this.tenum = null;
+      this.smi = null;
+      this.matchingSegmentPos = 0;
     }
   
     public void seek(TermEnum termEnum) throws IOException {
       seek(termEnum.term());
+      if (termEnum instanceof MultiTermEnum) {
+        this.tenum = (MultiTermEnum)termEnum;
+      }
     }
   
     public boolean next() throws IOException {
@@ -635,6 +660,14 @@
           return true;
         }
         else if (pointer < readers.length) {
+          if (tenum != null) {
+            smi = tenum.matchingSegments[matchingSegmentPos++];
+            if (smi==null) {
+              pointer = readers.length;
+              return false;
+            }
+            pointer = smi.ord;
+          }
           base = starts[pointer];
           current = termDocs(pointer++);
         } else {
@@ -648,6 +681,14 @@
       while (true) {
         while (current == null) {
           if (pointer < readers.length) {      // try next segment
+            if (tenum != null) {
+              smi = tenum.matchingSegments[matchingSegmentPos++];
+              if (smi==null) {
+                pointer = readers.length;
+                return 0;
+              }
+              pointer = smi.ord;
+            }
             base = starts[pointer];
             current = termDocs(pointer++);
           } else {
@@ -672,6 +713,14 @@
         if (current != null && current.skipTo(target-base)) {
           return true;
         } else if (pointer < readers.length) {
+          if (tenum != null) {
+            SegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++];
+            if (smi==null) {
+              pointer = readers.length;
+              return false;
+            }
+            pointer = smi.ord;
+          }
           base = starts[pointer];
           current = termDocs(pointer++);
         } else
@@ -683,7 +732,13 @@
       TermDocs result = readerTermDocs[i];
       if (result == null)
         result = readerTermDocs[i] = termDocs(readers[i]);
-      result.seek(term);
+      if (smi != null) {
+        assert(smi.ord == i);
+        assert(smi.termEnum.term().equals(term));
+        result.seek(smi.termEnum);
+      } else {
+        result.seek(term);
+      }
       return result;
     }
   

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMergeInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMergeInfo.java?rev=774846&r1=774845&r2=774846&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMergeInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMergeInfo.java Thu May 14 16:40:23
2009
@@ -22,6 +22,7 @@
 final class SegmentMergeInfo {
   Term term;
   int base;
+  int ord;  // the position of the segment in a MultiReader
   TermEnum termEnum;
   IndexReader reader;
   int delCount;



Mime
View raw message