lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From busc...@apache.org
Subject svn commit: r1058718 [5/18] - in /lucene/dev/branches/realtime_search: ./ lucene/ lucene/contrib/ lucene/contrib/ant/src/java/org/apache/lucene/ant/ lucene/contrib/ant/src/test/org/apache/lucene/ant/ lucene/contrib/benchmark/ lucene/contrib/demo/src/ja...
Date Thu, 13 Jan 2011 19:53:39 GMT
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java Thu Jan 13 19:53:21 2011
@@ -32,7 +32,7 @@ public abstract class TermsIndexWriterBa
     public abstract void finish() throws IOException;
   }
 
-  public abstract FieldWriter addField(FieldInfo fieldInfo);
+  public abstract FieldWriter addField(FieldInfo fieldInfo) throws IOException;
 
   public abstract void close() throws IOException;
-}
\ No newline at end of file
+}

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Thu Jan 13 19:53:21 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.index.FieldsEnu
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.CompoundFileReader;
@@ -742,11 +743,6 @@ public class PreFlexFields extends Field
     }
 
     @Override
-    public void cacheCurrentTerm() throws IOException {
-      getTermsDict().cacheCurrentTerm(termEnum);
-    }
-
-    @Override
     public SeekStatus seek(long ord) throws IOException {
       throw new UnsupportedOperationException();
     }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Thu Jan 13 19:53:21 2011
@@ -19,14 +19,15 @@ package org.apache.lucene.index.codecs.p
 
 import java.io.IOException;
 
+import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Document;
-import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Position;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
@@ -43,7 +44,7 @@ public class PulsingPostingsReaderImpl e
 
   // Fallback reader for non-pulsed terms:
   final PostingsReaderBase wrappedPostingsReader;
-  int maxPulsingDocFreq;
+  int maxPositions;
 
   public PulsingPostingsReaderImpl(PostingsReaderBase wrappedPostingsReader) throws IOException {
     this.wrappedPostingsReader = wrappedPostingsReader;
@@ -53,126 +54,80 @@ public class PulsingPostingsReaderImpl e
   public void init(IndexInput termsIn) throws IOException {
     CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
       PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
-    maxPulsingDocFreq = termsIn.readVInt();
     wrappedPostingsReader.init(termsIn);
   }
 
-  private static class PulsingTermState extends TermState {
-    private Document docs[];
-    private TermState wrappedTermState;
+  private static class PulsingTermState extends PrefixCodedTermState {
+    private byte[] postings;
+    private int postingsSize;                     // -1 if this term was not inlined
+    private PrefixCodedTermState wrappedTermState;
     private boolean pendingIndexTerm;
 
+    @Override
     public Object clone() {
       PulsingTermState clone;
       clone = (PulsingTermState) super.clone();
-      clone.docs = docs.clone();
-      for(int i=0;i<clone.docs.length;i++) {
-        final Document doc = clone.docs[i];
-        if (doc != null) {
-          clone.docs[i] = (Document) doc.clone();
-        }
+      if (postingsSize != -1) {
+        clone.postings = new byte[postingsSize];
+        System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
+      } else {
+        assert wrappedTermState != null;
+        clone.wrappedTermState = (PrefixCodedTermState) wrappedTermState.clone();
       }
-      clone.wrappedTermState = (TermState) wrappedTermState.clone();
       return clone;
     }
 
-    public void copy(TermState _other) {
-      super.copy(_other);
+    @Override
+    public void copyFrom(TermState _other) {
+      super.copyFrom(_other);
       PulsingTermState other = (PulsingTermState) _other;
-      pendingIndexTerm = other.pendingIndexTerm;
-      wrappedTermState.copy(other.wrappedTermState);
-      for(int i=0;i<docs.length;i++) {
-        if (other.docs[i] != null) {
-          docs[i] = (Document) other.docs[i].clone();
+      postingsSize = other.postingsSize;
+      if (other.postingsSize != -1) {
+        if (postings == null || postings.length < other.postingsSize) {
+          postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
         }
+        System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
+      } else {
+        wrappedTermState.copyFrom(other.wrappedTermState);
+      }
+    }
+
+    @Override
+    public String toString() {
+      if (postingsSize == -1) {
+        return "PulsingTermState: not inlined";
+      } else {
+        return "PulsingTermState: inlined size=" + postingsSize;
       }
     }
   }
 
   @Override
-  public TermState newTermState() throws IOException {
+  public PrefixCodedTermState newTermState() throws IOException {
     PulsingTermState state = new PulsingTermState();
     state.wrappedTermState = wrappedPostingsReader.newTermState();
-    state.docs = new Document[maxPulsingDocFreq];
     return state;
   }
 
   @Override
-  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException {
-
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState _termState, boolean isIndexTerm) throws IOException {
     PulsingTermState termState = (PulsingTermState) _termState;
 
     termState.pendingIndexTerm |= isIndexTerm;
 
-    if (termState.docFreq <= maxPulsingDocFreq) {
+    // TODO: wasteful to use whole byte for this (need just a 1 bit);
+    if (termsIn.readByte() == 1) {
 
-      // Inlined into terms dict -- read everything in
-
-      // TODO: maybe only read everything in lazily?  But
-      // then we'd need to store length so we could seek
-      // over it when docs/pos enum was not requested
-
-      // TODO: it'd be better to share this encoding logic
-      // in some inner codec that knows how to write a
-      // single doc / single position, etc.  This way if a
-      // given codec wants to store other interesting
-      // stuff, it could use this pulsing codec to do so
-
-      int docID = 0;
-      for(int i=0;i<termState.docFreq;i++) {
-        Document doc = termState.docs[i];
-        if (doc == null) {
-          doc = termState.docs[i] = new Document();
-        }
-        final int code = termsIn.readVInt();
-        if (fieldInfo.omitTermFreqAndPositions) {
-          docID += code;
-          doc.numPositions = 1;
-        } else {
-          docID += code>>>1;
-          if ((code & 1) != 0) {
-            doc.numPositions = 1;
-          } else {
-            doc.numPositions = termsIn.readVInt();
-          }
-            
-          if (doc.numPositions > doc.positions.length) {
-            doc.reallocPositions(doc.numPositions);
-          }
-
-          int position = 0;
-          int payloadLength = -1;
-
-          for(int j=0;j<doc.numPositions;j++) {
-            final Position pos = doc.positions[j];
-            final int code2 = termsIn.readVInt();
-            if (fieldInfo.storePayloads) {
-              position += code2 >>> 1;
-              if ((code2 & 1) != 0) {
-                payloadLength = termsIn.readVInt();
-              }
-
-              if (payloadLength > 0) {
-                if (pos.payload == null) {
-                  pos.payload = new BytesRef();
-                  pos.payload.bytes = new byte[payloadLength];
-                } else if (payloadLength > pos.payload.bytes.length) {
-                  pos.payload.grow(payloadLength);
-                }
-                pos.payload.length = payloadLength;
-                termsIn.readBytes(pos.payload.bytes, 0, payloadLength);
-              } else if (pos.payload != null) {
-                pos.payload.length = 0;
-              }
-            } else {
-              position += code2;
-            }
-            pos.pos = position;
-          }
-        }
-        doc.docID = docID;
+      // Inlined into terms dict -- just read the byte[] blob in,
+      // but don't decode it now (we only decode when a DocsEnum
+      // or D&PEnum is pulled):
+      termState.postingsSize = termsIn.readVInt();
+      if (termState.postings == null || termState.postings.length < termState.postingsSize) {
+        termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
       }
+      termsIn.readBytes(termState.postings, 0, termState.postingsSize);
     } else {
+      termState.postingsSize = -1;
       termState.wrappedTermState.docFreq = termState.docFreq;
       wrappedPostingsReader.readTerm(termsIn, fieldInfo, termState.wrappedTermState, termState.pendingIndexTerm);
       termState.pendingIndexTerm = false;
@@ -182,16 +137,21 @@ public class PulsingPostingsReaderImpl e
   // TODO: we could actually reuse, by having TL that
   // holds the last wrapped reuse, and vice-versa
   @Override
-  public DocsEnum docs(FieldInfo field, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+  public DocsEnum docs(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     PulsingTermState termState = (PulsingTermState) _termState;
-    if (termState.docFreq <= maxPulsingDocFreq) {
+    if (termState.postingsSize != -1) {
+      PulsingDocsEnum postings;
       if (reuse instanceof PulsingDocsEnum) {
-        return ((PulsingDocsEnum) reuse).reset(skipDocs, termState);
+        postings = (PulsingDocsEnum) reuse;
+        if (!postings.canReuse(field)) {
+          postings = new PulsingDocsEnum(field);
+        }
       } else {
-        PulsingDocsEnum docsEnum = new PulsingDocsEnum();
-        return docsEnum.reset(skipDocs, termState);
+        postings = new PulsingDocsEnum(field);
       }
+      return postings.reset(skipDocs, termState);
     } else {
+      // TODO: not great that we lose reuse of PulsingDocsEnum in this case:
       if (reuse instanceof PulsingDocsEnum) {
         return wrappedPostingsReader.docs(field, termState.wrappedTermState, skipDocs, null);
       } else {
@@ -202,15 +162,26 @@ public class PulsingPostingsReaderImpl e
 
   // TODO: -- not great that we can't always reuse
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
-    PulsingTermState termState = (PulsingTermState) _termState;
-    if (termState.docFreq <= maxPulsingDocFreq) {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+    if (field.omitTermFreqAndPositions) {
+      return null;
+    }
+    //System.out.println("D&P: field=" + field.name);
+
+    final PulsingTermState termState = (PulsingTermState) _termState;
+
+    if (termState.postingsSize != -1) {
+      PulsingDocsAndPositionsEnum postings;
       if (reuse instanceof PulsingDocsAndPositionsEnum) {
-        return ((PulsingDocsAndPositionsEnum) reuse).reset(skipDocs, termState);
+        postings = (PulsingDocsAndPositionsEnum) reuse;
+        if (!postings.canReuse(field)) {
+          postings = new PulsingDocsAndPositionsEnum(field);
+        }
       } else {
-        PulsingDocsAndPositionsEnum postingsEnum = new PulsingDocsAndPositionsEnum();
-        return postingsEnum.reset(skipDocs, termState);
+        postings = new PulsingDocsAndPositionsEnum(field);
       }
+
+      return postings.reset(skipDocs, termState);
     } else {
       if (reuse instanceof PulsingDocsAndPositionsEnum) {
         return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, skipDocs, null);
@@ -220,63 +191,90 @@ public class PulsingPostingsReaderImpl e
     }
   }
 
-  static class PulsingDocsEnum extends DocsEnum {
-    private int nextRead;
+  private static class PulsingDocsEnum extends DocsEnum {
+    private final ByteArrayDataInput postings = new ByteArrayDataInput(null);
+    private final boolean omitTF;
+    private final boolean storePayloads;
     private Bits skipDocs;
-    private Document doc;
-    private PulsingTermState state;
+    private int docID;
+    private int freq;
 
-    public void close() {}
-
-    PulsingDocsEnum reset(Bits skipDocs, PulsingTermState termState) {
-      // TODO: -- not great we have to clone here --
-      // merging is wasteful; TermRangeQuery too
-      state = (PulsingTermState) termState.clone();
+    public PulsingDocsEnum(FieldInfo fieldInfo) {
+      omitTF = fieldInfo.omitTermFreqAndPositions;
+      storePayloads = fieldInfo.storePayloads;
+    }
+
+    public PulsingDocsEnum reset(Bits skipDocs, PulsingTermState termState) {
+      //System.out.println("PR docsEnum termState=" + termState + " docFreq=" + termState.docFreq);
+      assert termState.postingsSize != -1;
+      final byte[] bytes = new byte[termState.postingsSize];
+      System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize);
+      postings.reset(bytes);
+      docID = 0;
+      freq = 1;
       this.skipDocs = skipDocs;
-      nextRead = 0;
       return this;
     }
 
+    boolean canReuse(FieldInfo fieldInfo) {
+      return omitTF == fieldInfo.omitTermFreqAndPositions && storePayloads == fieldInfo.storePayloads;
+    }
+
     @Override
-    public int nextDoc() {
+    public int nextDoc() throws IOException {
+      //System.out.println("PR nextDoc this= "+ this);
       while(true) {
-        if (nextRead >= state.docFreq) {
+        if (postings.eof()) {
+          //System.out.println("PR   END");
           return NO_MORE_DOCS;
+        }
+
+        final int code = postings.readVInt();
+        if (omitTF) {
+          docID += code;
         } else {
-          doc = state.docs[nextRead++];
-          if (skipDocs == null || !skipDocs.get(doc.docID)) {
-            return doc.docID;
+          docID += code >>> 1;              // shift off low bit
+          if ((code & 1) != 0) {          // if low bit is set
+            freq = 1;                     // freq is one
+          } else {
+            freq = postings.readVInt();     // else read freq
+          }
+
+          // Skip positions
+          if (storePayloads) {
+            int payloadLength = -1;
+            for(int pos=0;pos<freq;pos++) {
+              final int posCode = postings.readVInt();
+              if ((posCode & 1) != 0) {
+                payloadLength = postings.readVInt();
+              }
+              if (payloadLength != 0) {
+                postings.skipBytes(payloadLength);
+              }
+            }
+          } else {
+            for(int pos=0;pos<freq;pos++) {
+              // TODO: skipVInt
+              postings.readVInt();
+            }
           }
         }
-      }
-    }
 
-    @Override
-    public int read() {
-      int i=0;
-      // TODO: -- ob1?
-      initBulkResult();
-      final int[] docs = bulkResult.docs.ints;
-      final int[] freqs = bulkResult.freqs.ints;
-      while(nextRead < state.docFreq) {
-        doc = state.docs[nextRead++];
-        if (skipDocs == null || !skipDocs.get(doc.docID)) {
-          docs[i] = doc.docID;
-          freqs[i] = doc.numPositions;
-          i++;
+        if (skipDocs == null || !skipDocs.get(docID)) {
+          //System.out.println("  return docID=" + docID + " freq=" + freq);
+          return docID;
         }
       }
-      return i;
     }
 
     @Override
     public int freq() {
-      return doc.numPositions;
+      return freq;
     }
 
     @Override
     public int docID() {
-      return doc.docID;
+      return docID;
     }
 
     @Override
@@ -290,53 +288,79 @@ public class PulsingPostingsReaderImpl e
     }
   }
 
-  static class PulsingDocsAndPositionsEnum extends DocsAndPositionsEnum {
-    private int nextRead;
-    private int nextPosRead;
+  private static class PulsingDocsAndPositionsEnum extends DocsAndPositionsEnum {
+    private final ByteArrayDataInput postings = new ByteArrayDataInput(null);
+    private final boolean storePayloads;
+
     private Bits skipDocs;
-    private Document doc;
-    private Position pos;
-    private PulsingTermState state;
+    private int docID;
+    private int freq;
+    private int posPending;
+    private int position;
+    private int payloadLength;
+    private BytesRef payload;
 
-    // Only here to emulate limitation of standard codec,
-    // which only allows retrieving payload more than once
     private boolean payloadRetrieved;
 
-    public void close() {}
+    public PulsingDocsAndPositionsEnum(FieldInfo fieldInfo) {
+      storePayloads = fieldInfo.storePayloads;
+    }
+
+    boolean canReuse(FieldInfo fieldInfo) {
+      return storePayloads == fieldInfo.storePayloads;
+    }
 
-    PulsingDocsAndPositionsEnum reset(Bits skipDocs, PulsingTermState termState) {
-      // TODO: -- not great we have to clone here --
-      // merging is wasteful; TermRangeQuery too
-      state = (PulsingTermState) termState.clone();
+    public PulsingDocsAndPositionsEnum reset(Bits skipDocs, PulsingTermState termState) {
+      assert termState.postingsSize != -1;
+      final byte[] bytes = new byte[termState.postingsSize];
+      System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize);
+      postings.reset(bytes);
       this.skipDocs = skipDocs;
-      nextRead = 0;
-      nextPosRead = 0;
+      payloadLength = 0;
+      docID = 0;
+      //System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this);
       return this;
     }
 
     @Override
-    public int nextDoc() {
+    public int nextDoc() throws IOException {
+      //System.out.println("PR d&p nextDoc this=" + this);
+
       while(true) {
-        if (nextRead >= state.docFreq) {
+        //System.out.println("  cycle skip posPending=" + posPending);
+
+        skipPositions();
+
+        if (postings.eof()) {
+          //System.out.println("PR   END");
           return NO_MORE_DOCS;
+        }
+
+        final int code = postings.readVInt();
+        docID += code >>> 1;            // shift off low bit
+        if ((code & 1) != 0) {          // if low bit is set
+          freq = 1;                     // freq is one
         } else {
-          doc = state.docs[nextRead++];
-          if (skipDocs == null || !skipDocs.get(doc.docID)) {
-            nextPosRead = 0;
-            return doc.docID;
-          }
+          freq = postings.readVInt();     // else read freq
+        }
+        posPending = freq;
+
+        if (skipDocs == null || !skipDocs.get(docID)) {
+          //System.out.println("  return docID=" + docID + " freq=" + freq);
+          position = 0;
+          return docID;
         }
       }
     }
 
     @Override
     public int freq() {
-      return doc.numPositions;
+      return freq;
     }
 
     @Override
     public int docID() {
-      return doc.docID;
+      return docID;
     }
 
     @Override
@@ -351,22 +375,68 @@ public class PulsingPostingsReaderImpl e
     }
 
     @Override
-    public int nextPosition() {
-      assert nextPosRead < doc.numPositions;
-      pos = doc.positions[nextPosRead++];
-      payloadRetrieved = false;
-      return pos.pos;
+    public int nextPosition() throws IOException {
+      //System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq);
+      
+      assert posPending > 0;
+      posPending--;
+
+      if (storePayloads) {
+        if (!payloadRetrieved) {
+          //System.out.println("PR     skip payload=" + payloadLength);
+          postings.skipBytes(payloadLength);
+        }
+        final int code = postings.readVInt();
+        //System.out.println("PR     code=" + code);
+        if ((code & 1) != 0) {
+          payloadLength = postings.readVInt();
+          //System.out.println("PR     new payload len=" + payloadLength);
+        }
+        position += code >> 1;
+        payloadRetrieved = false;
+      } else {
+        position += postings.readVInt();
+      }
+
+      //System.out.println("PR d&p nextPos return pos=" + position + " this=" + this);
+      return position;
+    }
+
+    private void skipPositions() throws IOException {
+      while(posPending != 0) {
+        nextPosition();
+      }
+      if (storePayloads && !payloadRetrieved) {
+        //System.out.println("  skip payload len=" + payloadLength);
+        postings.skipBytes(payloadLength);
+        payloadRetrieved = true;
+      }
     }
 
     @Override
     public boolean hasPayload() {
-      return !payloadRetrieved && pos.payload != null && pos.payload.length > 0;
+      return storePayloads && !payloadRetrieved && payloadLength > 0;
     }
 
     @Override
-    public BytesRef getPayload() {
+    public BytesRef getPayload() throws IOException {
+      //System.out.println("PR  getPayload payloadLength=" + payloadLength + " this=" + this);
+      if (payloadRetrieved) {
+        throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+      }
       payloadRetrieved = true;
-      return pos.payload;
+      if (payloadLength > 0) {
+        if (payload == null) {
+          payload = new BytesRef(payloadLength);
+        } else {
+          payload.grow(payloadLength);
+        }
+        postings.readBytes(payload.bytes, 0, payloadLength);
+        payload.length = payloadLength;
+        return payload;
+      } else {
+        return null;
+      }
     }
   }
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Thu Jan 13 19:53:21 2011
@@ -20,12 +20,11 @@ package org.apache.lucene.index.codecs.p
 import java.io.IOException;
 
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.CodecUtil;
 
 // TODO: we now pulse entirely according to docFreq of the
 // term; it might be better to eg pulse by "net bytes used"
@@ -44,67 +43,21 @@ public final class PulsingPostingsWriter
 
   final static int VERSION_CURRENT = VERSION_START;
 
-  IndexOutput termsOut;
-
-  boolean omitTF;
-  boolean storePayloads;
-
-  // Starts a new term
-  FieldInfo fieldInfo;
-
-  /** @lucene.experimental */
-  public static class Document {
-    int docID;
-    int termDocFreq;
-    int numPositions;
-    Position[] positions;
-    Document() {
-      positions = new Position[1];
-      positions[0] = new Position();
-    }
-    
-    @Override
-    public Object clone() {
-      Document doc = new Document();
-      doc.docID = docID;
-      doc.termDocFreq = termDocFreq;
-      doc.numPositions = numPositions;
-      doc.positions = new Position[positions.length];
-      for(int i = 0; i < positions.length; i++) {
-        doc.positions[i] = (Position) positions[i].clone();
-      }
-
-      return doc;
-    }
+  private IndexOutput termsOut;
 
-    void reallocPositions(int minSize) {
-      final Position[] newArray = new Position[ArrayUtil.oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
-      System.arraycopy(positions, 0, newArray, 0, positions.length);
-      for(int i=positions.length;i<newArray.length;i++) {
-        newArray[i] = new Position();
-      }
-      positions = newArray;
-    }
-  }
+  private boolean omitTF;
+  private boolean storePayloads;
 
-  final Document[] pendingDocs;
-  int pendingDocCount = 0;
-  Document currentDoc;
-  boolean pulsed;                                 // false if we've seen > maxPulsingDocFreq docs
+  // one entry per position
+  private final Position[] pending;
+  private int pendingCount = 0;                           // -1 once we've hit too many positions
+  private Position currentDoc;                    // first Position entry of current doc
 
-  static class Position {
+  private static final class Position {
     BytesRef payload;
+    int termFreq;                                 // only incremented on first position for a given doc
     int pos;
-    
-    @Override
-    public Object clone() {
-      Position position = new Position();
-      position.pos = pos;
-      if (payload != null) {
-        position.payload = new BytesRef(payload);
-      }
-      return position;
-    }
+    int docID;
   }
 
   // TODO: -- lazy init this?  ie, if every single term
@@ -112,18 +65,19 @@ public final class PulsingPostingsWriter
   // Fallback writer for non-pulsed terms:
   final PostingsWriterBase wrappedPostingsWriter;
 
-  /** If docFreq <= maxPulsingDocFreq, its postings are
+  /** If the total number of positions (summed across all docs
+   *  for this term) is <= maxPositions, then the postings are
    *  inlined into terms dict */
-  public PulsingPostingsWriterImpl(int maxPulsingDocFreq, PostingsWriterBase wrappedPostingsWriter) throws IOException {
+  public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException {
     super();
 
-    pendingDocs = new Document[maxPulsingDocFreq];
-    for(int i=0;i<maxPulsingDocFreq;i++) {
-      pendingDocs[i] = new Document();
+    pending = new Position[maxPositions];
+    for(int i=0;i<maxPositions;i++) {
+      pending[i] = new Position();
     }
 
     // We simply wrap another postings writer, but only call
-    // on it when doc freq is higher than our cutoff
+    // on it when tot positions is >= the cutoff:
     this.wrappedPostingsWriter = wrappedPostingsWriter;
   }
 
@@ -131,14 +85,13 @@ public final class PulsingPostingsWriter
   public void start(IndexOutput termsOut) throws IOException {
     this.termsOut = termsOut;
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
-    termsOut.writeVInt(pendingDocs.length);
     wrappedPostingsWriter.start(termsOut);
   }
 
   @Override
   public void startTerm() {
-    assert pendingDocCount == 0;
-    pulsed = false;
+    //System.out.println("PW   startTerm");
+    assert pendingCount == 0;
   }
 
   // TODO: -- should we NOT reuse across fields?  would
@@ -148,73 +101,56 @@ public final class PulsingPostingsWriter
   // our parent calls setField whenever the field changes
   @Override
   public void setField(FieldInfo fieldInfo) {
-    this.fieldInfo = fieldInfo;
     omitTF = fieldInfo.omitTermFreqAndPositions;
+    //System.out.println("PW field=" + fieldInfo.name + " omitTF=" + omitTF);
     storePayloads = fieldInfo.storePayloads;
     wrappedPostingsWriter.setField(fieldInfo);
   }
 
   @Override
   public void startDoc(int docID, int termDocFreq) throws IOException {
-
     assert docID >= 0: "got docID=" + docID;
-        
-    if (!pulsed && pendingDocCount == pendingDocs.length) {
-      
-      // OK we just crossed the threshold, this term should
-      // now be written with our wrapped codec:
-      wrappedPostingsWriter.startTerm();
-      
-      // Flush all buffered docs
-      for(int i=0;i<pendingDocCount;i++) {
-        final Document doc = pendingDocs[i];
-
-        wrappedPostingsWriter.startDoc(doc.docID, doc.termDocFreq);
-
-        if (!omitTF) {
-          assert doc.termDocFreq == doc.numPositions;
-          for(int j=0;j<doc.termDocFreq;j++) {
-            final Position pos = doc.positions[j];
-            if (pos.payload != null && pos.payload.length > 0) {
-              assert storePayloads;
-              wrappedPostingsWriter.addPosition(pos.pos, pos.payload);
-            } else {
-              wrappedPostingsWriter.addPosition(pos.pos, null);
-            }
-          }
-          wrappedPostingsWriter.finishDoc();
-        }
-      }
+    //System.out.println("PW     doc=" + docID);
 
-      pendingDocCount = 0;
-
-      pulsed = true;
+    if (pendingCount == pending.length) {
+      push();
+      //System.out.println("PW: wrapped.finishDoc");
+      wrappedPostingsWriter.finishDoc();
     }
 
-    if (pulsed) {
+    if (pendingCount != -1) {
+      assert pendingCount < pending.length;
+      currentDoc = pending[pendingCount];
+      currentDoc.docID = docID;
+      if (omitTF) {
+        pendingCount++;
+      } else {
+        currentDoc.termFreq = termDocFreq;
+      }
+    } else {
       // We've already seen too many docs for this term --
       // just forward to our fallback writer
       wrappedPostingsWriter.startDoc(docID, termDocFreq);
-    } else {
-      currentDoc = pendingDocs[pendingDocCount++];
-      currentDoc.docID = docID;
-      // TODO: -- need not store in doc?  only used for alloc & assert
-      currentDoc.termDocFreq = termDocFreq;
-      if (termDocFreq > currentDoc.positions.length) {
-        currentDoc.reallocPositions(termDocFreq);
-      }
-      currentDoc.numPositions = 0;
     }
   }
 
   @Override
   public void addPosition(int position, BytesRef payload) throws IOException {
-    if (pulsed) {
+
+    //System.out.println("PW       pos=" + position + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
+    if (pendingCount == pending.length) {
+      push();
+    }
+
+    if (pendingCount == -1) {
+      // We've already seen too many docs for this term --
+      // just forward to our fallback writer
       wrappedPostingsWriter.addPosition(position, payload);
     } else {
-      // just buffer up
-      Position pos = currentDoc.positions[currentDoc.numPositions++];
+      // buffer up
+      final Position pos = pending[pendingCount++];
       pos.pos = position;
+      pos.docID = currentDoc.docID;
       if (payload != null && payload.length > 0) {
         if (pos.payload == null) {
           pos.payload = new BytesRef(payload);
@@ -229,86 +165,141 @@ public final class PulsingPostingsWriter
 
   @Override
   public void finishDoc() throws IOException {
-    assert omitTF || currentDoc.numPositions == currentDoc.termDocFreq;
-    if (pulsed) {
+    //System.out.println("PW     finishDoc");
+    if (pendingCount == -1) {
       wrappedPostingsWriter.finishDoc();
     }
   }
 
-  boolean pendingIsIndexTerm;
+  private boolean pendingIsIndexTerm;
 
-  int pulsedCount;
-  int nonPulsedCount;
+  private final RAMOutputStream buffer = new RAMOutputStream();
 
   /** Called when we are done adding docs to this term */
   @Override
   public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+    //System.out.println("PW   finishTerm docCount=" + docCount);
 
-    assert docCount > 0;
+    assert pendingCount > 0 || pendingCount == -1;
 
     pendingIsIndexTerm |= isIndexTerm;
 
-    if (pulsed) {
+    if (pendingCount == -1) {
+      termsOut.writeByte((byte) 0);
       wrappedPostingsWriter.finishTerm(docCount, pendingIsIndexTerm);
       pendingIsIndexTerm = false;
-      pulsedCount++;
     } else {
-      nonPulsedCount++;
-      // OK, there were few enough occurrences for this
+
+      // There were few enough total occurrences for this
       // term, so we fully inline our postings data into
       // terms dict, now:
-      int lastDocID = 0;
-      for(int i=0;i<pendingDocCount;i++) {
-        final Document doc = pendingDocs[i];
-        final int delta = doc.docID - lastDocID;
-        lastDocID = doc.docID;
-        if (omitTF) {
-          termsOut.writeVInt(delta);
-        } else {
-          assert doc.numPositions == doc.termDocFreq;
-          if (doc.numPositions == 1)
-            termsOut.writeVInt((delta<<1)|1);
-          else {
-            termsOut.writeVInt(delta<<1);
-            termsOut.writeVInt(doc.numPositions);
+
+      termsOut.writeByte((byte) 1);
+
+      // TODO: it'd be better to share this encoding logic
+      // in some inner codec that knows how to write a
+      // single doc / single position, etc.  This way if a
+      // given codec wants to store other interesting
+      // stuff, it could use this pulsing codec to do so
+
+      if (!omitTF) {
+        int lastDocID = 0;
+        int pendingIDX = 0;
+        while(pendingIDX < pendingCount) {
+          final Position doc = pending[pendingIDX];
+
+          final int delta = doc.docID - lastDocID;
+          lastDocID = doc.docID;
+
+          //System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);
+
+          if (doc.termFreq == 1) {
+            buffer.writeVInt((delta<<1)|1);
+          } else {
+            buffer.writeVInt(delta<<1);
+            buffer.writeVInt(doc.termFreq);
           }
 
-          // TODO: we could do better in encoding
-          // payloadLength, eg, if it's always the same
-          // across all terms
-          int lastPosition = 0;
+          int lastPos = 0;
           int lastPayloadLength = -1;
-
-          for(int j=0;j<doc.numPositions;j++) {
-            final Position pos = doc.positions[j];
-            final int delta2 = pos.pos - lastPosition;
-            lastPosition = pos.pos;
+          for(int posIDX=0;posIDX<doc.termFreq;posIDX++) {
+            final Position pos = pending[pendingIDX++];
+            assert pos.docID == doc.docID;
+            final int posDelta = pos.pos - lastPos;
+            lastPos = pos.pos;
+            //System.out.println("    write pos=" + pos.pos);
             if (storePayloads) {
               final int payloadLength = pos.payload == null ? 0 : pos.payload.length;
               if (payloadLength != lastPayloadLength) {
-                termsOut.writeVInt((delta2 << 1)|1);
-                termsOut.writeVInt(payloadLength);
+                buffer.writeVInt((posDelta << 1)|1);
+                buffer.writeVInt(payloadLength);
                 lastPayloadLength = payloadLength;
               } else {
-                termsOut.writeVInt(delta2 << 1);
+                buffer.writeVInt(posDelta << 1);
               }
-
               if (payloadLength > 0) {
-                termsOut.writeBytes(pos.payload.bytes, 0, pos.payload.length);
+                buffer.writeBytes(pos.payload.bytes, 0, pos.payload.length);
               }
             } else {
-              termsOut.writeVInt(delta2);
+              buffer.writeVInt(posDelta);
             }
           }
         }
+      } else {
+        int lastDocID = 0;
+        for(int posIDX=0;posIDX<pendingCount;posIDX++) {
+          final Position doc = pending[posIDX];
+          buffer.writeVInt(doc.docID - lastDocID);
+          lastDocID = doc.docID;
+        }
       }
+      
+      //System.out.println("  bytes=" + buffer.getFilePointer());
+      termsOut.writeVInt((int) buffer.getFilePointer());
+      buffer.writeTo(termsOut);
+      buffer.reset();
     }
 
-    pendingDocCount = 0;
+    pendingCount = 0;
   }
 
   @Override
   public void close() throws IOException {
     wrappedPostingsWriter.close();
   }
+
+  // Pushes pending positions to the wrapped codec
+  private void push() throws IOException {
+    //System.out.println("PW now push @ " + pendingCount + " wrapped=" + wrappedPostingsWriter);
+    assert pendingCount == pending.length;
+      
+    wrappedPostingsWriter.startTerm();
+      
+    // Flush all buffered docs
+    if (!omitTF) {
+      Position doc = null;
+      for(Position pos : pending) {
+        if (doc == null) {
+          doc = pos;
+          //System.out.println("PW: wrapped.startDoc docID=" + doc.docID + " tf=" + doc.termFreq);
+          wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
+        } else if (doc.docID != pos.docID) {
+          assert pos.docID > doc.docID;
+          //System.out.println("PW: wrapped.finishDoc");
+          wrappedPostingsWriter.finishDoc();
+          doc = pos;
+          //System.out.println("PW: wrapped.startDoc docID=" + doc.docID + " tf=" + doc.termFreq);
+          wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
+        }
+        //System.out.println("PW:   wrapped.addPos pos=" + pos.pos);
+        wrappedPostingsWriter.addPosition(pos.pos, pos.payload);
+      }
+      //wrappedPostingsWriter.finishDoc();
+    } else {
+      for(Position doc : pending) {
+        wrappedPostingsWriter.startDoc(doc.docID, 0);
+      }
+    }
+    pendingCount = -1;
+  }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Thu Jan 13 19:53:21 2011
@@ -25,8 +25,9 @@ import org.apache.lucene.index.DocsAndPo
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
@@ -129,20 +130,21 @@ public class SepPostingsReaderImpl exten
     }
   }
 
-  private static class SepTermState extends TermState {
+  private static final class SepTermState extends PrefixCodedTermState {
     // We store only the seek point to the docs file because
     // the rest of the info (freqIndex, posIndex, etc.) is
     // stored in the docs file:
     IntIndexInput.Index docIndex;
-
+    
+    @Override
     public Object clone() {
       SepTermState other = (SepTermState) super.clone();
       other.docIndex = (IntIndexInput.Index) docIndex.clone();
       return other;
     }
 
-    public void copy(TermState _other) {
-      super.copy(_other);
+    public void copyFrom(TermState _other) {
+      super.copyFrom(_other);
       SepTermState other = (SepTermState) _other;
       docIndex.set(other.docIndex);
     }
@@ -154,19 +156,19 @@ public class SepPostingsReaderImpl exten
   }
 
   @Override
-  public TermState newTermState() throws IOException {
+  public PrefixCodedTermState newTermState() throws IOException {
     final SepTermState state =  new SepTermState();
     state.docIndex = docIn.index();
     return state;
   }
 
   @Override
-  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException {
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException {
     ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm);
   }
 
   @Override
-  public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+  public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     final SepTermState termState = (SepTermState) _termState;
     SepDocsEnum docsEnum;
     if (reuse == null || !(reuse instanceof SepDocsEnum)) {
@@ -185,7 +187,7 @@ public class SepPostingsReaderImpl exten
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     assert !fieldInfo.omitTermFreqAndPositions;
     final SepTermState termState = (SepTermState) _termState;
     SepDocsAndPositionsEnum postingsEnum;
@@ -594,6 +596,7 @@ public class SepPostingsReaderImpl exten
       }
 
       final int code = posReader.next();
+      assert code >= 0;
       if (storePayloads) {
         if ((code & 1) != 0) {
           // Payload length has changed

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Thu Jan 13 19:53:21 2011
@@ -85,24 +85,20 @@ public final class SepPostingsWriterImpl
     super();
 
     final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
-    state.flushedFiles.add(docFileName);
     docOut = factory.createOutput(state.directory, docFileName);
     docIndex = docOut.index();
 
     if (state.fieldInfos.hasProx()) {
       final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
-      state.flushedFiles.add(frqFileName);
       freqOut = factory.createOutput(state.directory, frqFileName);
       freqIndex = freqOut.index();
 
       final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
       posOut = factory.createOutput(state.directory, posFileName);
-      state.flushedFiles.add(posFileName);
       posIndex = posOut.index();
 
       // TODO: -- only if at least one field stores payloads?
       final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
-      state.flushedFiles.add(payloadFileName);
       payloadOut = state.directory.createOutput(payloadFileName);
 
     } else {
@@ -114,7 +110,6 @@ public final class SepPostingsWriterImpl
     }
 
     final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
-    state.flushedFiles.add(skipFileName);
     skipOut = state.directory.createOutput(skipFileName);
 
     totalNumDocs = state.numDocs;
@@ -211,6 +206,7 @@ public final class SepPostingsWriterImpl
     assert !omitTF;
 
     final int delta = position - lastPosition;
+    assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
     lastPosition = position;
 
     if (storePayloads) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Thu Jan 13 19:53:21 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -130,9 +131,8 @@ class SimpleTextFieldsReader extends Fie
 
     public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
 
-      fstEnum.reset();
       //System.out.println("seek to text=" + text.utf8ToString());
-      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.advance(text);
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.seekCeil(text);
       if (result == null) {
         //System.out.println("  end");
         return SeekStatus.END;
@@ -153,10 +153,6 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
-    public void cacheCurrentTerm() {
-    }
-
-    @Override
     public BytesRef next() throws IOException {
       assert !ended;
       final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.next();
@@ -215,7 +211,7 @@ class SimpleTextFieldsReader extends Fie
       } 
       return docsAndPositionsEnum.reset(docsStart, skipDocs);
     }
-
+    
     @Override
     public Comparator<BytesRef> getComparator() {
       return BytesRef.getUTF8SortedAsUnicodeComparator();
@@ -440,7 +436,6 @@ class SimpleTextFieldsReader extends Fie
   }
 
   private class SimpleTextTerms extends Terms {
-    private final String field;
     private final long termsStart;
     private final boolean omitTF;
     private FST<PairOutputs.Pair<Long,Long>> fst;
@@ -448,7 +443,6 @@ class SimpleTextFieldsReader extends Fie
     private final BytesRef scratch = new BytesRef(10);
 
     public SimpleTextTerms(String field, long termsStart) throws IOException {
-      this.field = StringHelper.intern(field);
       this.termsStart = termsStart;
       omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
       loadTerms();

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java Thu Jan 13 19:53:21 2011
@@ -46,7 +46,6 @@ class SimpleTextFieldsWriter extends Fie
   public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
     final String fileName = SimpleTextCodec.getPostingsFileName(state.segmentName, state.codecId);
     out = state.directory.createOutput(fileName);
-    state.flushedFiles.add(fileName);
   }
 
   private void write(String s) throws IOException {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Thu Jan 13 19:53:21 2011
@@ -31,8 +31,8 @@ import org.apache.lucene.index.codecs.Po
 import org.apache.lucene.index.codecs.PostingsReaderBase;
 import org.apache.lucene.index.codecs.TermsIndexWriterBase;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
 import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
 import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
 import org.apache.lucene.store.Directory;
@@ -56,7 +56,7 @@ public class StandardCodec extends Codec
     TermsIndexWriterBase indexWriter;
     boolean success = false;
     try {
-      indexWriter = new FixedGapTermsIndexWriter(state);
+      indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
       success = true;
     } finally {
       if (!success) {
@@ -89,12 +89,11 @@ public class StandardCodec extends Codec
 
     boolean success = false;
     try {
-      indexReader = new FixedGapTermsIndexReader(state.dir,
-                                                       state.fieldInfos,
-                                                       state.segmentInfo.name,
-                                                       state.termsIndexDivisor,
-                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
-                                                       state.codecId);
+      indexReader = new VariableGapTermsIndexReader(state.dir,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo.name,
+                                                    state.termsIndexDivisor,
+                                                    state.codecId);
       success = true;
     } finally {
       if (!success) {
@@ -136,7 +135,7 @@ public class StandardCodec extends Codec
   public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
     StandardPostingsReader.files(dir, segmentInfo, id, files);
     PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
-    FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
+    VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
   }
 
   @Override
@@ -148,6 +147,6 @@ public class StandardCodec extends Codec
     extensions.add(FREQ_EXTENSION);
     extensions.add(PROX_EXTENSION);
     PrefixCodedTermsReader.getExtensions(extensions);
-    FixedGapTermsIndexReader.getIndexExtensions(extensions);
+    VariableGapTermsIndexReader.getIndexExtensions(extensions);
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Thu Jan 13 19:53:21 2011
@@ -26,8 +26,9 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -83,20 +84,20 @@ public class StandardPostingsReader exte
   }
 
   // Must keep final because we do non-standard clone
-  private final static class DocTermState extends TermState {
+  private final static class StandardTermState extends PrefixCodedTermState {
     long freqOffset;
     long proxOffset;
     int skipOffset;
 
     public Object clone() {
-      DocTermState other = new DocTermState();
-      other.copy(this);
+      StandardTermState other = new StandardTermState();
+      other.copyFrom(this);
       return other;
     }
 
-    public void copy(TermState _other) {
-      super.copy(_other);
-      DocTermState other = (DocTermState) _other;
+    public void copyFrom(TermState _other) {
+      super.copyFrom(_other);
+      StandardTermState other = (StandardTermState) _other;
       freqOffset = other.freqOffset;
       proxOffset = other.proxOffset;
       skipOffset = other.skipOffset;
@@ -108,8 +109,8 @@ public class StandardPostingsReader exte
   }
 
   @Override
-  public TermState newTermState() {
-    return new DocTermState();
+  public PrefixCodedTermState newTermState() {
+    return new StandardTermState();
   }
 
   @Override
@@ -126,10 +127,9 @@ public class StandardPostingsReader exte
   }
 
   @Override
-  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm)
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm)
     throws IOException {
-
-    final DocTermState docTermState = (DocTermState) termState;
+    final StandardTermState docTermState = (StandardTermState) termState;
 
     if (isIndexTerm) {
       docTermState.freqOffset = termsIn.readVLong();
@@ -153,7 +153,7 @@ public class StandardPostingsReader exte
   }
     
   @Override
-  public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+  public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     SegmentDocsEnum docsEnum;
     if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
       docsEnum = new SegmentDocsEnum(freqIn);
@@ -166,11 +166,11 @@ public class StandardPostingsReader exte
         docsEnum = new SegmentDocsEnum(freqIn);
       }
     }
-    return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+    return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     if (fieldInfo.omitTermFreqAndPositions) {
       return null;
     }
@@ -189,7 +189,7 @@ public class StandardPostingsReader exte
           docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn);
         }
       }
-      return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+      return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
     } else {
       SegmentDocsAndPositionsEnum docsEnum;
       if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
@@ -203,7 +203,7 @@ public class StandardPostingsReader exte
           docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
         }
       }
-      return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+      return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
     }
   }
 
@@ -233,7 +233,7 @@ public class StandardPostingsReader exte
       this.freqIn = (IndexInput) freqIn.clone();
     }
 
-    public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+    public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
       omitTF = fieldInfo.omitTermFreqAndPositions;
       if (omitTF) {
         freq = 1;
@@ -407,7 +407,7 @@ public class StandardPostingsReader exte
       this.proxIn = (IndexInput) proxIn.clone();
     }
 
-    public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+    public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
       assert !fieldInfo.omitTermFreqAndPositions;
       assert !fieldInfo.storePayloads;
 
@@ -594,7 +594,7 @@ public class StandardPostingsReader exte
       this.proxIn = (IndexInput) proxIn.clone();
     }
 
-    public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+    public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
       assert !fieldInfo.omitTermFreqAndPositions;
       assert fieldInfo.storePayloads;
       if (payload == null) {
@@ -785,6 +785,7 @@ public class StandardPostingsReader exte
       if (payloadLength > payload.bytes.length) {
         payload.grow(payloadLength);
       }
+
       proxIn.readBytes(payload.bytes, 0, payloadLength);
       payload.length = payloadLength;
       payloadPending = false;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Thu Jan 13 19:53:21 2011
@@ -61,14 +61,12 @@ public final class StandardPostingsWrite
   public StandardPostingsWriter(SegmentWriteState state) throws IOException {
     super();
     String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
-    state.flushedFiles.add(fileName);
     freqOut = state.directory.createOutput(fileName);
 
     if (state.fieldInfos.hasProx()) {
       // At least one field does not omit TF, so create the
       // prox file
       fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
-      state.flushedFiles.add(fileName);
       proxOut = state.directory.createOutput(fileName);
     } else {
       // Every field omits TF so we will write no prox file
@@ -157,7 +155,7 @@ public final class StandardPostingsWrite
 
     final int delta = position - lastPosition;
     
-    assert delta > 0 || position == 0 || position == -1: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
+    assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
 
     lastPosition = position;
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Thu Jan 13 19:53:21 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -62,10 +63,12 @@ public class BooleanQuery extends Query 
   }
 
   private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();
-  private boolean disableCoord;
+  private final boolean disableCoord;
 
   /** Constructs an empty boolean query. */
-  public BooleanQuery() {}
+  public BooleanQuery() {
+    disableCoord = false;
+  }
 
   /** Constructs an empty boolean query.
    *
@@ -86,22 +89,6 @@ public class BooleanQuery extends Query 
    */
   public boolean isCoordDisabled() { return disableCoord; }
 
-  // Implement coord disabling.
-  // Inherit javadoc.
-  @Override
-  public Similarity getSimilarity(Searcher searcher) {
-    Similarity result = super.getSimilarity(searcher);
-    if (disableCoord) {                           // disable coord as requested
-      result = new SimilarityDelegator(result) {
-          @Override
-          public float coord(int overlap, int maxOverlap) {
-            return 1.0f;
-          }
-        };
-    }
-    return result;
-  }
-
   /**
    * Specifies a minimum number of the optional BooleanClauses
    * which must be satisfied.
@@ -178,10 +165,12 @@ public class BooleanQuery extends Query 
     protected Similarity similarity;
     protected ArrayList<Weight> weights;
     protected int maxCoord;  // num optional + num required
+    private final boolean disableCoord;
 
-    public BooleanWeight(Searcher searcher)
+    public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
       throws IOException {
       this.similarity = getSimilarity(searcher);
+      this.disableCoord = disableCoord;
       weights = new ArrayList<Weight>(clauses.size());
       for (int i = 0 ; i < clauses.size(); i++) {
         BooleanClause c = clauses.get(i);
@@ -223,7 +212,7 @@ public class BooleanQuery extends Query 
     }
 
     @Override
-    public Explanation explain(IndexReader reader, int doc)
+    public Explanation explain(AtomicReaderContext context, int doc)
       throws IOException {
       final int minShouldMatch =
         BooleanQuery.this.getMinimumNumberShouldMatch();
@@ -237,7 +226,7 @@ public class BooleanQuery extends Query 
       for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
         Weight w = wIter.next();
         BooleanClause c = cIter.next();
-        if (w.scorer(reader, true, true) == null) {
+        if (w.scorer(context, ScorerContext.def().scoreDocsInOrder(true).topScorer(true)) == null) {
           if (c.isRequired()) {
             fail = true;
             Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
@@ -245,7 +234,7 @@ public class BooleanQuery extends Query 
           }
           continue;
         }
-        Explanation e = w.explain(reader, doc);
+        Explanation e = w.explain(context, doc);
         if (e.isMatch()) {
           if (!c.isProhibited()) {
             sumExpl.addDetail(e);
@@ -284,10 +273,10 @@ public class BooleanQuery extends Query 
       sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
       sumExpl.setValue(sum);
       
-      float coordFactor = similarity.coord(coord, maxCoord);
-      if (coordFactor == 1.0f)                      // coord is no-op
+      final float coordFactor = disableCoord ? 1.0f : similarity.coord(coord, maxCoord);
+      if (coordFactor == 1.0f) {
         return sumExpl;                             // eliminate wrapper
-      else {
+      } else {
         ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
                                                            sum*coordFactor,
                                                            "product of:");
@@ -299,7 +288,7 @@ public class BooleanQuery extends Query 
     }
 
     @Override
-    public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer)
+    public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext)
         throws IOException {
       List<Scorer> required = new ArrayList<Scorer>();
       List<Scorer> prohibited = new ArrayList<Scorer>();
@@ -307,7 +296,7 @@ public class BooleanQuery extends Query 
       Iterator<BooleanClause> cIter = clauses.iterator();
       for (Weight w  : weights) {
         BooleanClause c =  cIter.next();
-        Scorer subScorer = w.scorer(reader, true, false);
+        Scorer subScorer = w.scorer(context, ScorerContext.def());
         if (subScorer == null) {
           if (c.isRequired()) {
             return null;
@@ -322,8 +311,8 @@ public class BooleanQuery extends Query 
       }
       
       // Check if we can return a BooleanScorer
-      if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
-        return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
+      if (!scorerContext.scoreDocsInOrder && scorerContext.topScorer && required.size() == 0 && prohibited.size() < 32) {
+        return new BooleanScorer(this, disableCoord, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
       }
       
       if (required.size() == 0 && optional.size() == 0) {
@@ -337,7 +326,7 @@ public class BooleanQuery extends Query 
       }
       
       // Return a BooleanScorer2
-      return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
+      return new BooleanScorer2(this, disableCoord, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
     }
     
     @Override
@@ -362,8 +351,8 @@ public class BooleanQuery extends Query 
   }
 
   @Override
-  public Weight createWeight(Searcher searcher) throws IOException {
-    return new BooleanWeight(searcher);
+  public Weight createWeight(IndexSearcher searcher) throws IOException {
+    return new BooleanWeight(searcher, disableCoord);
   }
 
   @Override

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Thu Jan 13 19:53:21 2011
@@ -20,7 +20,7 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;
 
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.BooleanClause.Occur;
 
 /* Description from Doug Cutting (excerpted from
@@ -92,7 +92,7 @@ final class BooleanScorer extends Scorer
     }
     
     @Override
-    public void setNextReader(IndexReader reader, int docBase) {
+    public void setNextReader(AtomicReaderContext context) {
       // not needed by this implementation
     }
     
@@ -197,9 +197,9 @@ final class BooleanScorer extends Scorer
   private Bucket current;
   private int doc = -1;
   
-  BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch,
+  BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
-    super(similarity, weight);
+    super(null, weight);   // Similarity not used
     this.minNrShouldMatch = minNrShouldMatch;
 
     if (optionalScorers != null && optionalScorers.size() > 0) {
@@ -222,15 +222,14 @@ final class BooleanScorer extends Scorer
     }
 
     coordFactors = new float[optionalScorers.size() + 1];
-    Similarity sim = getSimilarity();
     for (int i = 0; i < coordFactors.length; i++) {
-      coordFactors[i] = sim.coord(i, maxCoord); 
+      coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord); 
     }
   }
 
   // firstDocID is ignored since nextDoc() initializes 'current'
   @Override
-  protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+  public boolean score(Collector collector, int max, int firstDocID) throws IOException {
     boolean more;
     Bucket tmp;
     BucketScorer bs = new BucketScorer();

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Thu Jan 13 19:53:21 2011
@@ -42,14 +42,12 @@ class BooleanScorer2 extends Scorer {
     int maxCoord = 0; // to be increased for each non prohibited scorer
     int nrMatchers; // to be increased by score() of match counting scorers.
     
-    void init() { // use after all scorers have been added.
+    void init(Similarity sim, boolean disableCoord) { // use after all scorers have been added.
       coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1];
-      Similarity sim = getSimilarity();
       for (int i = 0; i < coordFactors.length; i++) {
-        coordFactors[i] = sim.coord(i, maxCoord);
+        coordFactors[i] = disableCoord ? 1.0f : sim.coord(i, maxCoord);
       }
     }
-    
   }
 
   private final Coordinator coordinator;
@@ -82,9 +80,9 @@ class BooleanScorer2 extends Scorer {
    * @param optional
    *          the list of optional scorers.
    */
-  public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch,
+  public BooleanScorer2(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
-    super(similarity, weight);
+    super(null, weight);   // Similarity not used
     if (minNrShouldMatch < 0) {
       throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
     }
@@ -96,8 +94,8 @@ class BooleanScorer2 extends Scorer {
     requiredScorers = required;    
     prohibitedScorers = prohibited;
     
-    coordinator.init();
-    countingSumScorer = makeCountingSumScorer();
+    coordinator.init(similarity, disableCoord);
+    countingSumScorer = makeCountingSumScorer(disableCoord, similarity);
   }
   
   /** Count a scorer as a single match. */
@@ -109,7 +107,7 @@ class BooleanScorer2 extends Scorer {
     private float lastDocScore = Float.NaN;
 
     SingleMatchScorer(Scorer scorer) {
-      super(scorer.getSimilarity());
+      super(null); // No similarity used.
       this.scorer = scorer;
     }
 
@@ -164,12 +162,12 @@ class BooleanScorer2 extends Scorer {
     };
   }
 
-  private static final Similarity defaultSimilarity = Similarity.getDefault();
-
-  private Scorer countingConjunctionSumScorer(List<Scorer> requiredScorers) throws IOException {
+  private Scorer countingConjunctionSumScorer(boolean disableCoord,
+                                              Similarity similarity,
+                                              List<Scorer> requiredScorers) throws IOException {
     // each scorer from the list counted as a single matcher
     final int requiredNrMatchers = requiredScorers.size();
-    return new ConjunctionScorer(defaultSimilarity, requiredScorers) {
+    return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) {
       private int lastScoredDoc = -1;
       // Save the score of lastScoredDoc, so that we don't compute it more than
       // once in score().
@@ -192,8 +190,10 @@ class BooleanScorer2 extends Scorer {
     };
   }
 
-  private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) throws IOException { // non counting.
-    return new ConjunctionScorer(defaultSimilarity, req1, req2);
+  private Scorer dualConjunctionSumScorer(boolean disableCoord,
+                                          Similarity similarity,
+                                          Scorer req1, Scorer req2) throws IOException { // non counting.
+    return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(2, 2), req1, req2);
     // All scorers match, so defaultSimilarity always has 1 as
     // the coordination factor.
     // Therefore the sum of the scores of two scorers
@@ -203,13 +203,14 @@ class BooleanScorer2 extends Scorer {
   /** Returns the scorer to be used for match counting and score summing.
    * Uses requiredScorers, optionalScorers and prohibitedScorers.
    */
-  private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher
+  private Scorer makeCountingSumScorer(boolean disableCoord,
+                                       Similarity similarity) throws IOException { // each scorer counted as a single matcher
     return (requiredScorers.size() == 0)
-          ? makeCountingSumScorerNoReq()
-          : makeCountingSumScorerSomeReq();
+      ? makeCountingSumScorerNoReq(disableCoord, similarity)
+      : makeCountingSumScorerSomeReq(disableCoord, similarity);
   }
 
-  private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers
+  private Scorer makeCountingSumScorerNoReq(boolean disableCoord, Similarity similarity) throws IOException { // No required scorers
     // minNrShouldMatch optional scorers are required, but at least 1
     int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch;
     Scorer requiredCountingSumScorer;
@@ -217,24 +218,27 @@ class BooleanScorer2 extends Scorer {
       requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired);
     else if (optionalScorers.size() == 1)
       requiredCountingSumScorer = new SingleMatchScorer(optionalScorers.get(0));
-    else
-      requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers);
+    else {
+      requiredCountingSumScorer = countingConjunctionSumScorer(disableCoord, similarity, optionalScorers);
+    }
     return addProhibitedScorers(requiredCountingSumScorer);
   }
 
-  private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer.
+  private Scorer makeCountingSumScorerSomeReq(boolean disableCoord, Similarity similarity) throws IOException { // At least one required scorer.
     if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required.
       ArrayList<Scorer> allReq = new ArrayList<Scorer>(requiredScorers);
       allReq.addAll(optionalScorers);
-      return addProhibitedScorers(countingConjunctionSumScorer(allReq));
+      return addProhibitedScorers(countingConjunctionSumScorer(disableCoord, similarity, allReq));
     } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
       Scorer requiredCountingSumScorer =
             requiredScorers.size() == 1
             ? new SingleMatchScorer(requiredScorers.get(0))
-            : countingConjunctionSumScorer(requiredScorers);
+            : countingConjunctionSumScorer(disableCoord, similarity, requiredScorers);
       if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers
         return addProhibitedScorers( 
                       dualConjunctionSumScorer( // non counting
+                              disableCoord,
+                              similarity,
                               requiredCountingSumScorer,
                               countingDisjunctionSumScorer(
                                       optionalScorers,
@@ -276,7 +280,7 @@ class BooleanScorer2 extends Scorer {
   }
   
   @Override
-  protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+  public boolean score(Collector collector, int max, int firstDocID) throws IOException {
     doc = firstDocID;
     collector.setScorer(this);
     while (doc < max) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BoostAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BoostAttribute.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BoostAttribute.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/BoostAttribute.java Thu Jan 13 19:53:21 2011
@@ -21,7 +21,7 @@ import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeSource; // javadocs only
 import org.apache.lucene.index.TermsEnum; // javadocs only
 
-/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}
+/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(Terms,AttributeSource)}
  * and update the boost on each returned term. This enables to control the boost factor
  * for each matching term in {@link MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE} or
  * {@link TopTermsRewrite} mode.

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java Thu Jan 13 19:53:21 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search;
 
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.util.Bits;
 
 import java.io.IOException;
@@ -61,8 +61,8 @@ public class CachingSpanFilter extends S
   }
 
   @Override
-  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
-    SpanFilterResult result = getCachedResult(reader);
+  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+    SpanFilterResult result = getCachedResult(context.reader);
     return result != null ? result.getDocIdSet() : null;
   }
   
@@ -72,7 +72,7 @@ public class CachingSpanFilter extends S
   private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
 
     final Object coreKey = reader.getCoreCacheKey();
-    final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey;
+    final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey;
 
     SpanFilterResult result = cache.get(reader, coreKey, delCoreKey);
     if (result != null) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java Thu Jan 13 19:53:21 2011
@@ -23,7 +23,7 @@ import java.util.Map;
 import java.util.WeakHashMap;
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.util.OpenBitSetDISI;
 import org.apache.lucene.util.Bits;
 
@@ -38,6 +38,9 @@ import org.apache.lucene.util.Bits;
  * {@link DeletesMode#DYNAMIC}).
  */
 public class CachingWrapperFilter extends Filter {
+  // TODO: make this filter aware of ReaderContext. a cached filter could 
+  // specify the actual readers key or something similar to indicate on which
+  // level of the readers hierarchy it should be cached.
   Filter filter;
 
   /**
@@ -105,7 +108,7 @@ public class CachingWrapperFilter extend
           // deletions
           value = cache.get(coreKey);
           if (value != null) {
-            final Bits delDocs = MultiFields.getDeletedDocs(reader);
+            final Bits delDocs = reader.getDeletedDocs();
             if (delDocs != null) {
               value = mergeDeletes(delDocs, value);
             }
@@ -192,10 +195,10 @@ public class CachingWrapperFilter extend
   int hitCount, missCount;
 
   @Override
-  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
-
+  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+    final IndexReader reader = context.reader;
     final Object coreKey = reader.getCoreCacheKey();
-    final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey;
+    final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey;
 
     DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey);
     if (docIdSet != null) {
@@ -206,7 +209,7 @@ public class CachingWrapperFilter extend
     missCount++;
 
     // cache miss
-    docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader);
+    docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader);
 
     if (docIdSet != null) {
       cache.put(coreKey, delCoreKey, docIdSet);

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/Collector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/Collector.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/Collector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/Collector.java Thu Jan 13 19:53:21 2011
@@ -19,7 +19,8 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader.ReaderContext;
 
 /**
  * <p>Expert: Collectors are primarily meant to be used to
@@ -98,8 +99,8 @@ import org.apache.lucene.index.IndexRead
  *     bits.set(doc + docBase);
  *   }
  * 
- *   public void setNextReader(IndexReader reader, int docBase) {
- *     this.docBase = docBase;
+ *   public void setNextReader(AtomicReaderContext context) {
+ *     this.docBase = context.docBase;
  *   }
  * });
  * </pre>
@@ -136,24 +137,23 @@ public abstract class Collector {
    * 
    * <p>
    * Note: This is called in an inner search loop. For good search performance,
-   * implementations of this method should not call {@link Searcher#doc(int)} or
+   * implementations of this method should not call {@link IndexSearcher#doc(int)} or
    * {@link org.apache.lucene.index.IndexReader#document(int)} on every hit.
    * Doing so can slow searches by an order of magnitude or more.
    */
   public abstract void collect(int doc) throws IOException;
 
   /**
-   * Called before collecting from each IndexReader. All doc ids in
-   * {@link #collect(int)} will correspond to reader.
+   * Called before collecting from each {@link AtomicReaderContext}. All doc ids in
+   * {@link #collect(int)} will correspond to {@link ReaderContext#reader}.
    * 
-   * Add docBase to the current IndexReaders internal document id to re-base ids
-   * in {@link #collect(int)}.
+   * Add {@link AtomicReaderContext#docBase} to the current  {@link ReaderContext#reader}'s
+   * internal document id to re-base ids in {@link #collect(int)}.
    * 
-   * @param reader
-   *          next IndexReader
-   * @param docBase
+   * @param context
+   *          next atomic reader context
    */
-  public abstract void setNextReader(IndexReader reader, int docBase) throws IOException;
+  public abstract void setNextReader(AtomicReaderContext context) throws IOException;
 
   /**
    * Return <code>true</code> if this collector does not

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java Thu Jan 13 19:53:21 2011
@@ -29,14 +29,14 @@ class ConjunctionScorer extends Scorer {
   private final float coord;
   private int lastDoc = -1;
 
-  public ConjunctionScorer(Similarity similarity, Collection<Scorer> scorers) throws IOException {
-    this(similarity, scorers.toArray(new Scorer[scorers.size()]));
+  public ConjunctionScorer(float coord, Collection<Scorer> scorers) throws IOException {
+    this(coord, scorers.toArray(new Scorer[scorers.size()]));
   }
 
-  public ConjunctionScorer(Similarity similarity, Scorer... scorers) throws IOException {
-    super(similarity);
+  public ConjunctionScorer(float coord, Scorer... scorers) throws IOException {
+    super(null);
     this.scorers = scorers;
-    coord = similarity.coord(scorers.length, scorers.length);
+    this.coord = coord;
     
     for (int i = 0; i < scorers.length; i++) {
       if (scorers[i].nextDoc() == NO_MORE_DOCS) {



Mime
View raw message