lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1002246 - in /lucene/dev/trunk: lucene/ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/ lucene/src/java/org/apache/lucene/index/ lucene/src/java/org/...
Date Tue, 28 Sep 2010 16:31:45 GMT
Author: mikemccand
Date: Tue Sep 28 16:31:44 2010
New Revision: 1002246

URL: http://svn.apache.org/viewvc?rev=1002246&view=rev
Log:
LUCENE-2674: improve interaction of MTQ & terms cache

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/MIGRATE.txt
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
    lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Sep 28 16:31:44 2010
@@ -121,6 +121,9 @@ Changes in backwards compatibility polic
   priority queue size, you can use FuzzyQuery(Term, float, int, int) to specify 
   those explicitly.
   
+* LUCENE-2674: MultiTermQuery.TermCollector.collect now accepts the
+  TermsEnum as well.  (Robert Muir, Mike McCandless)
+
 Changes in Runtime Behavior
 
 * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
@@ -150,6 +153,11 @@ API Changes
   commit points when they are not needed anymore (instead of waiting for the 
   next commit). (Shai Erera)
 
+* LUCENE-2674: A new idfExplain method was added to Similarity, that
+  accepts an incoming docFreq.  If you subclass Similarity, make sure
+  you also override this method on upgrade.  (Robert Muir, Mike
+  McCandless)
+
 New features
 
 * LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions

Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Tue Sep 28 16:31:44 2010
@@ -278,3 +278,8 @@ LUCENE-1458, LUCENE-2111: Flexible Index
       // document is deleted...
     }
     
+* LUCENE-2674: A new idfExplain method was added to Similarity, that
+  accepts an incoming docFreq.  If you subclass Similarity, make sure
+  you also override this method on upgrade, otherwise your
+  customizations won't run for certain MultiTermQuerys.
+

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
(original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
Tue Sep 28 16:31:44 2010
@@ -91,6 +91,10 @@ public class InstantiatedTermsEnum exten
   }
 
   @Override
+  public void cacheCurrentTerm() {
+  }
+
+  @Override
   public BytesRef term() {
     return br;
   }

Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
(original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Tue Sep 28 16:31:44 2010
@@ -874,6 +874,10 @@ public class MemoryIndex implements Seri
       }
 
       @Override
+      public void cacheCurrentTerm() {
+      }
+
+      @Override
       public long ord() {
         return termUpto;
       }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Tue Sep
28 16:31:44 2010
@@ -130,6 +130,11 @@ public class FilterIndexReader extends I
     }
 
     @Override
+    public void cacheCurrentTerm() throws IOException {
+      in.cacheCurrentTerm();
+    }
+
+    @Override
     public SeekStatus seek(long ord) throws IOException {
       return in.seek(ord);
     }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Tue Sep 28
16:31:44 2010
@@ -91,6 +91,13 @@ public final class MultiTermsEnum extend
   }
 
   @Override
+  public void cacheCurrentTerm() throws IOException {
+    for(int i=0;i<numTop;i++) {
+      top[i].terms.cacheCurrentTerm();
+    }
+  }
+
+  @Override
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }
@@ -213,7 +220,7 @@ public final class MultiTermsEnum extend
     throw new UnsupportedOperationException();
   }
 
-  private final void pullTop() {
+  private void pullTop() {
     // extract all subs from the queue that have the same
     // top term
     assert numTop == 0;
@@ -226,7 +233,7 @@ public final class MultiTermsEnum extend
     current = top[0].current;
   }
 
-  private final void pushTop() throws IOException {
+  private void pushTop() throws IOException {
     // call next() on each top, and put back into queue
     for(int i=0;i<numTop;i++) {
       top[i].current = top[i].terms.next();
@@ -418,7 +425,7 @@ public final class MultiTermsEnum extend
     }
 
     @Override
-    protected final boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB)
{
+    protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
       final int cmp = termComp.compare(termsA.current, termsB.current);
       if (cmp != 0) {
         return cmp < 0;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java Tue Sep 28 16:31:44
2010
@@ -123,6 +123,10 @@ public abstract class TermsEnum {
    *  instance & reuse it. */
   public abstract Comparator<BytesRef> getComparator() throws IOException;
 
+  /** Optional optimization hint: informs the codec that the
+   *  current term is likely to be re-seek'd-to soon.  */
+  public abstract void cacheCurrentTerm() throws IOException;
+
   /** An empty TermsEnum for quickly returning an empty instance e.g.
    * in {@link org.apache.lucene.search.MultiTermQuery}
    * <p><em>Please note:</em> This enum should be unmodifiable,
@@ -138,6 +142,9 @@ public abstract class TermsEnum {
     public SeekStatus seek(long ord) { return SeekStatus.END; }
     
     @Override
+    public void cacheCurrentTerm() {}
+    
+    @Override
     public BytesRef term() {
       throw new IllegalStateException("this method should never be called");
     }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
(original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
Tue Sep 28 16:31:44 2010
@@ -82,6 +82,11 @@ public class PrefixCodedTermsReader exte
     public FieldAndTerm() {
     }
 
+    public FieldAndTerm(String field, BytesRef term) {
+      this.field = field;
+      this.term = new BytesRef(term);
+    }
+
     public FieldAndTerm(FieldAndTerm other) {
       field = other.field;
       term = new BytesRef(other.term);
@@ -297,6 +302,14 @@ public class PrefixCodedTermsReader exte
         return termComp;
       }
 
+      @Override
+      public void cacheCurrentTerm() {
+        TermState stateCopy = (TermState) state.clone();
+        stateCopy.filePointer = in.getFilePointer();
+        termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term),
+                       stateCopy);
+      }
+
       /** Seeks until the first term that's >= the provided
        *  text; returns SeekStatus.FOUND if the exact term
        *  is found, SeekStatus.NOT_FOUND if a different term

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
(original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
Tue Sep 28 16:31:44 2010
@@ -316,7 +316,7 @@ public class PreFlexFields extends Field
       }
 
       // Seek "back":
-      getTermsDict().seekEnum(te, protoTerm.createTerm(term));
+      getTermsDict().seekEnum(te, protoTerm.createTerm(term), true);
 
       // Test if the term we seek'd to in fact found a
       // surrogate pair at the same position as the E:
@@ -387,7 +387,7 @@ public class PreFlexFields extends Field
 
           if (seekToNonBMP(seekTermEnum, prevTerm, downTo)) {
             // TODO: more efficient seek?
-            getTermsDict().seekEnum(termEnum, seekTermEnum.term());
+            getTermsDict().seekEnum(termEnum, seekTermEnum.term(), true);
             //newSuffixStart = downTo+4;
             newSuffixStart = downTo;
             scratchTerm.copy(termEnum.term().bytes());
@@ -443,7 +443,7 @@ public class PreFlexFields extends Field
           
         // TODO: more efficient seek?  can we simply swap
         // the enums?
-        getTermsDict().seekEnum(termEnum, protoTerm.createTerm(scratchTerm));
+        getTermsDict().seekEnum(termEnum, protoTerm.createTerm(scratchTerm), true);
 
         final Term t2 = termEnum.term();
 
@@ -619,7 +619,7 @@ public class PreFlexFields extends Field
 
           // Seek "forward":
           // TODO: more efficient seek?
-          getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(scratchTerm));
+          getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(scratchTerm), true);
 
           scratchTerm.bytes[upTo] = scratch[0];
           scratchTerm.bytes[upTo+1] = scratch[1];
@@ -668,7 +668,7 @@ public class PreFlexFields extends Field
 
             // OK seek "back"
             // TODO: more efficient seek?
-            getTermsDict().seekEnum(termEnum, seekTermEnum.term());
+            getTermsDict().seekEnum(termEnum, seekTermEnum.term(), true);
 
             scratchTerm.copy(seekTermEnum.term().bytes());
 
@@ -701,7 +701,7 @@ public class PreFlexFields extends Field
         seekTermEnum = getTermsDict().terms(protoTerm);
         //System.out.println("  term=" + termEnum.term());
       } else {
-        getTermsDict().seekEnum(termEnum, protoTerm);
+        getTermsDict().seekEnum(termEnum, protoTerm, true);
       }
       skipNext = true;
 
@@ -727,6 +727,11 @@ public class PreFlexFields extends Field
     }
 
     @Override
+    public void cacheCurrentTerm() throws IOException {
+      getTermsDict().cacheCurrentTerm(termEnum);
+    }
+
+    @Override
     public SeekStatus seek(long ord) throws IOException {
       throw new UnsupportedOperationException();
     }
@@ -747,7 +752,7 @@ public class PreFlexFields extends Field
 
       assert termEnum != null;
 
-      tis.seekEnum(termEnum, t0);
+      tis.seekEnum(termEnum, t0, useCache);
 
       final Term t = termEnum.term();
 
@@ -783,7 +788,7 @@ public class PreFlexFields extends Field
             if (seekToNonBMP(seekTermEnum, scratchTerm, i)) {
 
               scratchTerm.copy(seekTermEnum.term().bytes());
-              getTermsDict().seekEnum(termEnum, seekTermEnum.term());
+              getTermsDict().seekEnum(termEnum, seekTermEnum.term(), useCache);
 
               newSuffixStart = 1+i;
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
(original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
Tue Sep 28 16:31:44 2010
@@ -52,7 +52,7 @@ public final class SegmentTermEnum imple
   private TermBuffer prevBuffer = new TermBuffer();
   private TermBuffer scanBuffer = new TermBuffer(); // used for scanning
 
-  private TermInfo termInfo = new TermInfo();
+  TermInfo termInfo = new TermInfo();
 
   private int format;
   private boolean isIndex = false;
@@ -61,7 +61,6 @@ public final class SegmentTermEnum imple
   int skipInterval;
   int newSuffixStart;
   int maxSkipLevels;
-  private int formatM1SkipInterval;
 
   SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
           throws CorruptIndexException, IOException {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
(original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
Tue Sep 28 16:31:44 2010
@@ -54,8 +54,8 @@ public final class TermInfosReader {
   
   // Just adds term's ord to TermInfo
   private final static class TermInfoAndOrd extends TermInfo {
-    final int termOrd;
-    public TermInfoAndOrd(TermInfo ti, int termOrd) {
+    final long termOrd;
+    public TermInfoAndOrd(TermInfo ti, long termOrd) {
       super(ti);
       this.termOrd = termOrd;
     }
@@ -228,14 +228,24 @@ public final class TermInfosReader {
       return tiOrd;
     }
 
-    return seekEnum(resources.termEnum, term, tiOrd);
+    return seekEnum(resources.termEnum, term, tiOrd, true);
   }
 
-  TermInfo seekEnum(SegmentTermEnum enumerator, Term term) throws IOException {
-    return seekEnum(enumerator, term, termsCache.get(new CloneableTerm(term)));
+  public void cacheCurrentTerm(SegmentTermEnum enumerator) {
+    termsCache.put(new CloneableTerm(enumerator.term()),
+                   new TermInfoAndOrd(enumerator.termInfo,
+                                      enumerator.position));
   }
 
-  TermInfo seekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd) throws IOException
{
+  TermInfo seekEnum(SegmentTermEnum enumerator, Term term, boolean useCache) throws IOException
{
+    if (useCache) {
+      return seekEnum(enumerator, term, termsCache.get(new CloneableTerm(term)), useCache);
+    } else {
+      return seekEnum(enumerator, term, null, useCache);
+    }
+  }
+
+  TermInfo seekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd, boolean
useCache) throws IOException {
     if (size == 0) {
       return null;
     }
@@ -252,7 +262,7 @@ public final class TermInfosReader {
         final TermInfo ti;
         int numScans = enumerator.scanTo(term);
         if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) ==
0) {
-          ti = enumerator.termInfo();
+          ti = enumerator.termInfo;
           if (numScans > 1) {
             // we only  want to put this TermInfo into the cache if
             // scanEnum skipped more than one dictionary entry.
@@ -260,7 +270,9 @@ public final class TermInfosReader {
             // wipe out the cache when they iterate over a large numbers
             // of terms in order
             if (tiOrd == null) {
-              termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, (int) enumerator.position));
+              if (useCache) {
+                termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
+              }
             } else {
               assert sameTermInfo(ti, tiOrd, enumerator);
               assert (int) enumerator.position == tiOrd.termOrd;
@@ -277,7 +289,7 @@ public final class TermInfosReader {
     // random-access: must seek
     final int indexPos;
     if (tiOrd != null) {
-      indexPos = tiOrd.termOrd / totalIndexInterval;
+      indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
     } else {
       // Must do binary search:
       indexPos = getIndexOffset(term);
@@ -288,9 +300,11 @@ public final class TermInfosReader {
     final TermInfo ti;
 
     if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0)
{
-      ti = enumerator.termInfo();
+      ti = enumerator.termInfo;
       if (tiOrd == null) {
-        termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, (int) enumerator.position));
+        if (useCache) {
+          termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
+        }
       } else {
         assert sameTermInfo(ti, tiOrd, enumerator);
         assert (int) enumerator.position == tiOrd.termOrd;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
(original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
Tue Sep 28 16:31:44 2010
@@ -32,6 +32,8 @@ import org.apache.lucene.util.StringHelp
 
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.Map;
+import java.util.HashMap;
 
 class SimpleTextFieldsReader extends FieldsProducer {
 
@@ -154,6 +156,10 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
+    public void cacheCurrentTerm() {
+    }
+
+    @Override
     public BytesRef next() throws IOException {
       assert !ended;
       readLine(in, scratch);
@@ -468,16 +474,23 @@ class SimpleTextFieldsReader extends Fie
     return new SimpleTextFieldsEnum();
   }
 
+  private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
+
   @Override
-  public Terms terms(String field) throws IOException {
-    SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator();
-    String fieldUpto;
-    while((fieldUpto = fe.next()) != null) {
-      if (fieldUpto.equals(field)) {
-        return new SimpleTextTerms(field, fe.in.getFilePointer());
+  synchronized public Terms terms(String field) throws IOException {
+    Terms terms = termsCache.get(field);
+    if (terms == null) {
+      SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator();
+      String fieldUpto;
+      while((fieldUpto = fe.next()) != null) {
+        if (fieldUpto.equals(field)) {
+          terms = new SimpleTextTerms(field, fe.in.getFilePointer());
+          break;
+        }
       }
+      termsCache.put(field, terms);
     }
-    return null;
+    return terms;
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Sep
28 16:31:44 2010
@@ -46,7 +46,6 @@ public abstract class FilteredTermsEnum 
   private BytesRef initialSeekTerm = null;
   private boolean doSeek = true;        
   private BytesRef actualTerm = null;
-  private boolean useTermsCache = false;
 
   private final TermsEnum tenum;
 
@@ -116,16 +115,6 @@ public abstract class FilteredTermsEnum 
     return t;
   }
 
-  /** Expert: enable or disable the terms cache when seeking. */
-  protected final void setUseTermsCache(boolean useTermsCache) {
-    this.useTermsCache = useTermsCache;
-  }
-
-  /** Expert: enable or disable the terms cache when seeking. */
-  protected final boolean getUseTermsCache() {
-    return useTermsCache;
-  }
-
   /**
    * Returns the related attributes, the returned {@link AttributeSource}
    * is shared with the delegate {@code TermsEnum}.
@@ -188,6 +177,11 @@ public abstract class FilteredTermsEnum 
     assert tenum != null;
     return tenum.docsAndPositions(bits, reuse);
   }
+
+  @Override
+  public void cacheCurrentTerm() throws IOException {
+    tenum.cacheCurrentTerm();
+  }
     
   @Override
   public BytesRef next() throws IOException {
@@ -200,7 +194,7 @@ public abstract class FilteredTermsEnum 
         final BytesRef t = nextSeekTerm(actualTerm);
         // Make sure we always seek forward:
         assert actualTerm == null || t == null || getComparator().compare(t, actualTerm)
> 0: "curTerm=" + actualTerm + " seekTerm=" + t;
-        if (t == null || tenum.seek(t, useTermsCache) == SeekStatus.END) {
+        if (t == null || tenum.seek(t, false) == SeekStatus.END) {
           // no more terms to seek to or enum exhausted
           return null;
         }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Tue Sep
28 16:31:44 2010
@@ -220,6 +220,11 @@ public final class FuzzyTermsEnum extend
   }
   
   @Override
+  public void cacheCurrentTerm() throws IOException {
+    actualEnum.cacheCurrentTerm();
+  }
+
+  @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
     return actualEnum.docs(skipDocs, reuse);
   }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Tue Sep
28 16:31:44 2010
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.ArrayList;
 import java.util.PriorityQueue;
 
 import org.apache.lucene.index.IndexReader;
@@ -201,7 +200,8 @@ public abstract class MultiTermQuery ext
       int count = 0;
       BytesRef bytes;
       while ((bytes = termsEnum.next()) != null) {
-        if (collector.collect(bytes, boostAtt.getBoost())) {
+        if (collector.collect(termsEnum, bytes, boostAtt.getBoost())) {
+          termsEnum.cacheCurrentTerm();
           count++;
         } else {
           break;
@@ -215,7 +215,7 @@ public abstract class MultiTermQuery ext
       private BoostAttribute boostAtt = null;
     
       /** return false to stop collecting */
-      public abstract boolean collect(BytesRef bytes, float boost) throws IOException;
+      public abstract boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws
IOException;
       
       /** set the minimum boost as a hint for the term producer */
       protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
@@ -231,9 +231,10 @@ public abstract class MultiTermQuery ext
       final BooleanQuery result = new BooleanQuery(true);
       final Term placeholderTerm = new Term(query.field);
       query.incTotalNumberOfTerms(collectTerms(reader, query, new TermCollector() {
-        public boolean collect(BytesRef bytes, float boost) {
+        @Override
+        public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) {
           // add new TQ, we must clone the term, else it may get overwritten!
-          TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)));
+          TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)), termsEnum.docFreq());
           tq.setBoost(query.getBoost() * boost); // set the boost
           result.add(tq, BooleanClause.Occur.SHOULD); // add to query
           return true;
@@ -291,20 +292,22 @@ public abstract class MultiTermQuery ext
     }
     
     /** Return a suitable Query for a MultiTermQuery term. */
-    protected abstract Query getQuery(Term term);
+    protected abstract Query getQuery(Term term, int docCount);
 
     @Override
     public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException
{
       final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
       final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
       collectTerms(reader, query, new TermCollector() {
-        public boolean collect(BytesRef bytes, float boost) {
+        @Override
+        public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) {
           // ignore uncompetetive hits
           if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost)
             return true;
           // add new entry in PQ, we must clone the term, else it may get overwritten!
           st.bytes.copy(bytes);
           st.boost = boost;
+          st.docFreq = termsEnum.docFreq();
           stQueue.offer(st);
           // possibly drop entries from queue
           st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm();
@@ -320,7 +323,7 @@ public abstract class MultiTermQuery ext
       final BooleanQuery bq = new BooleanQuery(true);
       for (final ScoreTerm st : stQueue) {
         // add new query, we must clone the term, else it may get overwritten!
-        Query tq = getQuery(placeholderTerm.createTerm(st.bytes));
+        Query tq = getQuery(placeholderTerm.createTerm(st.bytes), st.docFreq);
         tq.setBoost(query.getBoost() * st.boost); // set the boost
         bq.add(tq, BooleanClause.Occur.SHOULD);   // add to query
       }
@@ -349,6 +352,7 @@ public abstract class MultiTermQuery ext
     private static class ScoreTerm implements Comparable<ScoreTerm> {
       public final BytesRef bytes = new BytesRef();
       public float boost;
+      public int docFreq;
       
       public int compareTo(ScoreTerm other) {
         if (this.boost == other.boost)
@@ -395,8 +399,8 @@ public abstract class MultiTermQuery ext
     }
     
     @Override
-    protected Query getQuery(Term term) {
-      return new TermQuery(term);
+    protected Query getQuery(Term term, int docFreq) {
+      return new TermQuery(term, docFreq);
     }
   }
   
@@ -433,8 +437,8 @@ public abstract class MultiTermQuery ext
     }
     
     @Override
-    protected Query getQuery(Term term) {
-      return new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term)));
+    protected Query getQuery(Term term, int docFreq) {
+      return new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term, docFreq)));
     }
   }
   
@@ -567,18 +571,14 @@ public abstract class MultiTermQuery ext
         this.termCountLimit = termCountLimit;
       }
     
-      public boolean collect(BytesRef bytes, float boost) throws IOException {
+      public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException
{
         termCount++;
         if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) {
           hasCutOff = true;
           return false;
         }
         pendingTerms.copyUsingLengthPrefix(bytes);
-        // Loading the TermInfo from the terms dict here
-        // should not be costly, because 1) the
-        // query/filter will load the TermInfo when it
-        // runs, and 2) the terms dict has a cache:
-        docVisitCount += reader.docFreq(field, bytes);
+        docVisitCount += termsEnum.docFreq();
         return true;
       }
       

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java Tue Sep 28 16:31:44
2010
@@ -741,7 +741,7 @@ public abstract class Similarity impleme
    * The default implementation uses:
    * 
    * <pre>
-   * idf(searcher.docFreq(term), searcher.maxDoc());
+   * idf(docFreq, searcher.maxDoc());
    * </pre>
    * 
    * Note that {@link Searcher#maxDoc()} is used instead of
@@ -752,12 +752,13 @@ public abstract class Similarity impleme
    *   
    * @param term the term in question
    * @param searcher the document collection being searched
+   * @param docFreq externally computed docFreq for this term
    * @return an IDFExplain object that includes both an idf score factor 
              and an explanation for the term.
    * @throws IOException
    */
-  public IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException
{
-    final int df = searcher.docFreq(term);
+  public IDFExplanation idfExplain(final Term term, final Searcher searcher, int docFreq)
throws IOException {
+    final int df = docFreq;
     final int max = searcher.maxDoc();
     final float idf = idf(df, max);
     return new IDFExplanation() {
@@ -773,6 +774,15 @@ public abstract class Similarity impleme
    }
 
   /**
+   * This method forwards to {@link
+   * idfExplain(Term,Searcher,int)} by passing
+   * <code>searcher.docFreq(term)</code> as the docFreq.
+   */
+  public IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException
{
+    return idfExplain(term, searcher, searcher.docFreq(term));
+   }
+
+  /**
    * Computes a score factor for a phrase.
    * 
    * <p>

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Sep 28 16:31:44
2010
@@ -30,7 +30,8 @@ import org.apache.lucene.util.ToStringUt
   This may be combined with other terms with a {@link BooleanQuery}.
   */
 public class TermQuery extends Query {
-  private Term term;
+  private final Term term;
+  private final int docFreq;
 
   private class TermWeight extends Weight {
     private final Similarity similarity;
@@ -43,7 +44,11 @@ public class TermQuery extends Query {
     public TermWeight(Searcher searcher)
       throws IOException {
       this.similarity = getSimilarity(searcher);
-      idfExp = similarity.idfExplain(term, searcher);
+      if (docFreq != -1) {
+        idfExp = similarity.idfExplain(term, searcher, docFreq);
+      } else {
+        idfExp = similarity.idfExplain(term, searcher);
+      }
       idf = idfExp.getIdf();
     }
 
@@ -160,7 +165,15 @@ public class TermQuery extends Query {
 
   /** Constructs a query for the term <code>t</code>. */
   public TermQuery(Term t) {
+    this(t, -1);
+  }
+
+  /** Expert: constructs a TermQuery that will use the
+   *  provided docFreq instead of looking up the docFreq
+   *  against the searcher. */
+  public TermQuery(Term t, int docFreq) {
     term = t;
+    this.docFreq = docFreq;
   }
 
   /** Returns the term of this query. */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
(original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
Tue Sep 28 16:31:44 2010
@@ -285,6 +285,11 @@ public class DocTermsIndexCreator<T exte
       }
 
       @Override
+      public void cacheCurrentTerm() throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
       public BytesRef term() throws IOException {
         return term;
       }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Tue Sep 28
16:31:44 2010
@@ -320,6 +320,10 @@ public class TestExternalCodecs extends 
       }
 
       @Override
+      public void cacheCurrentTerm() {
+      }
+
+      @Override
       public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
         return new RAMDocsEnum(ramField.termToDocs.get(current), skipDocs);
       }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java Tue
Sep 28 16:31:44 2010
@@ -33,6 +33,7 @@ import org.apache.lucene.store.MockDirec
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.BytesRef;
+import static org.junit.Assume.*;
 
 /**
  * Tests lazy skipping on the proximity file.
@@ -117,14 +118,13 @@ public class TestLazyProxSkipping extend
         assertTrue(this.seeksCounter > 0);
         assertTrue("seeksCounter=" + this.seeksCounter + " numHits=" + numHits, this.seeksCounter
<= numHits + 1);
     }
-    
+ 
     public void testLazySkipping() throws IOException {
+        assumeTrue(!CodecProvider.getDefaultCodec().equals("SimpleText"));
         // test whether only the minimum amount of seeks()
         // are performed
-        if (!CodecProvider.getDefaultCodec().equals("SimpleText")) {
-          performTest(5);
-          performTest(10);
-        }
+        performTest(5);
+        performTest(10);
     }
     
     public void testSeek() throws IOException {

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
(original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
Tue Sep 28 16:31:44 2010
@@ -87,7 +87,6 @@ public class TestNumericRangeQuery64 ext
       ascfield2.setLongValue(val);
       writer.addDocument(doc);
     }
-  
     reader = writer.getReader();
     searcher=new IndexSearcher(reader);
     writer.close();

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java Tue Sep 28
16:31:44 2010
@@ -191,9 +191,7 @@ public abstract class LuceneTestCase ext
   
   private static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
   
-  // TODO 4.0: make sure we re-enable SimpleText in the rotation
-  //private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock",
"MockVariableIntBlock", "SimpleText"};
-  private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock",
"MockVariableIntBlock"};
+  private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock",
"MockVariableIntBlock", "SimpleText"};
 
   private static void swapCodec(Codec c) {
     final CodecProvider cp = CodecProvider.getDefault();
@@ -246,8 +244,7 @@ public abstract class LuceneTestCase ext
     swapCodec(new MockFixedIntBlockCodec(codecHasParam && "MockFixedIntBlock".equals(codec)
? codecParam : _TestUtil.nextInt(random, 1, 2000)));
     // baseBlockSize cannot be over 127:
     swapCodec(new MockVariableIntBlockCodec(codecHasParam && "MockVariableIntBlock".equals(codec)
? codecParam : _TestUtil.nextInt(random, 1, 127)));
-    // TODO 4.0: add this into test rotation
-    //swapCodec(new SimpleTextCodec());
+    swapCodec(new SimpleTextCodec());
 
     return cp.lookup(codec);
   }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=1002246&r1=1002245&r2=1002246&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java Tue Sep 28
16:31:44 2010
@@ -40,7 +40,6 @@ import org.apache.solr.search.*;
 import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.LongPriorityQueue;
 import org.apache.solr.util.PrimUtils;
-import org.apache.solr.util.BoundedTreeSet;
 import org.apache.solr.handler.component.StatsValues;
 import org.apache.solr.handler.component.FieldFacetStats;
 import org.apache.lucene.util.OpenBitSet;
@@ -1002,6 +1001,11 @@ class NumberedTermsEnum extends TermsEnu
     return tenum.docFreq();
   }
 
+  @Override
+  public void cacheCurrentTerm() {
+    throw new UnsupportedOperationException();
+  }
+
   public BytesRef skipTo(BytesRef target) throws IOException {
 
     // already here



Mime
View raw message