lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1049178 [2/2] - in /lucene/dev/branches/bulkpostings/lucene: contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ contrib/memory/src/java/org/apache/lucene/index/memory/ src/java/org/apache/lucene/index/ src/java/org/apache/...
Date Tue, 14 Dec 2010 17:18:02 GMT
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Tue Dec 14 17:18:00 2010
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Collection;
 
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
@@ -65,7 +66,7 @@ public class SepPostingsReaderImpl exten
       skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), readBufferSize);
 
       if (segmentInfo.getHasProx()) {
-        freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION));
+        freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION), readBufferSize);
         posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), readBufferSize);
         payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), readBufferSize);
       } else {
@@ -169,36 +170,37 @@ public class SepPostingsReaderImpl exten
   public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     final SepTermState termState = (SepTermState) _termState;
     SepDocsEnum docsEnum;
-    if (reuse == null || !(reuse instanceof SepDocsEnum)) {
+    if (reuse == null || !(reuse instanceof SepDocsEnum) || !((SepDocsEnum) reuse).canReuse(docIn)) {
       docsEnum = new SepDocsEnum();
     } else {
       docsEnum = (SepDocsEnum) reuse;
-      if (docsEnum.startDocIn != docIn) {
-        // If you are using ParellelReader, and pass in a
-        // reused DocsAndPositionsEnum, it could have come
-        // from another reader also using sep codec
-        docsEnum = new SepDocsEnum();        
-      }
     }
 
     return docsEnum.init(fieldInfo, termState, skipDocs);
   }
 
   @Override
+  public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+    final SepTermState termState = (SepTermState) _termState;
+    SepBulkPostingsEnum postingsEnum;
+    if (reuse == null || !(reuse instanceof SepBulkPostingsEnum) || !((SepBulkPostingsEnum) reuse).canReuse(fieldInfo, docIn, doFreqs, doPositions)) {
+      postingsEnum = new SepBulkPostingsEnum(fieldInfo, doFreqs, doPositions);
+    } else {
+      postingsEnum = (SepBulkPostingsEnum) reuse;
+    }
+
+    return postingsEnum.init(termState);
+  }
+
+  @Override
   public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     assert !fieldInfo.omitTermFreqAndPositions;
     final SepTermState termState = (SepTermState) _termState;
     SepDocsAndPositionsEnum postingsEnum;
-    if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
+    if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum) || !((SepDocsAndPositionsEnum) reuse).canReuse(docIn)) {
       postingsEnum = new SepDocsAndPositionsEnum();
     } else {
       postingsEnum = (SepDocsAndPositionsEnum) reuse;
-      if (postingsEnum.startDocIn != docIn) {
-        // If you are using ParellelReader, and pass in a
-        // reused DocsAndPositionsEnum, it could have come
-        // from another reader also using sep codec
-        postingsEnum = new SepDocsAndPositionsEnum();        
-      }
     }
 
     return postingsEnum.init(fieldInfo, termState, skipDocs);
@@ -209,14 +211,19 @@ public class SepPostingsReaderImpl exten
     int doc;
     int count;
     int freq;
-    long freqStart;
 
     // TODO: -- should we do omitTF with 2 different enum classes?
     private boolean omitTF;
     private boolean storePayloads;
     private Bits skipDocs;
-    private final IntIndexInput.Reader docReader;
-    private final IntIndexInput.Reader freqReader;
+    private final BulkPostingsEnum.BlockReader docReader;
+    private final int[] docDeltaBuffer;
+    private int docDeltaUpto;
+    private int docDeltaLimit;
+    private final BulkPostingsEnum.BlockReader freqReader;
+    private final int[] freqBuffer;
+    private int freqUpto;
+    private int freqLimit;
     private long skipOffset;
 
     private final IntIndexInput.Index docIndex;
@@ -224,21 +231,22 @@ public class SepPostingsReaderImpl exten
     private final IntIndexInput.Index posIndex;
     private final IntIndexInput startDocIn;
 
-    // TODO: -- should we do hasProx with 2 different enum classes?
-
     boolean skipped;
     SepSkipListReader skipper;
 
-    SepDocsEnum() throws IOException {
+    public SepDocsEnum() throws IOException {
       startDocIn = docIn;
       docReader = docIn.reader();
+      docDeltaBuffer = docReader.getBuffer();
       docIndex = docIn.index();
       if (freqIn != null) {
         freqReader = freqIn.reader();
+        freqBuffer = freqReader.getBuffer();
         freqIndex = freqIn.index();
       } else {
         freqReader = null;
         freqIndex = null;
+        freqBuffer = null;
       }
       if (posIn != null) {
         posIndex = posIn.index();                 // only init this so skipper can read it
@@ -247,6 +255,10 @@ public class SepPostingsReaderImpl exten
       }
     }
 
+    // nocommit -- somehow we have to prevent re-decode of
+    // the same block if we have just .next()'d to next term
+    // in the terms dict -- this is an O(N^2) cost to eg
+    // TermRangeQuery when it steps through low freq terms!!
     SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
       this.skipDocs = skipDocs;
       omitTF = fieldInfo.omitTermFreqAndPositions;
@@ -256,42 +268,82 @@ public class SepPostingsReaderImpl exten
       // skipped consuming the previous docs?
       docIndex.set(termState.docIndex);
       docIndex.seek(docReader);
+      docDeltaLimit = docReader.end();
+      docDeltaUpto = docReader.offset();
+      if (docDeltaUpto >= docDeltaLimit) {
+        docDeltaLimit = docReader.fill();
+      }
 
       if (!omitTF) {
         freqIndex.read(docReader, true);
         freqIndex.seek(freqReader);
+        freqUpto = freqReader.offset();
+        freqLimit = freqReader.end();
+        if (freqUpto >= freqLimit) {
+          freqLimit = freqReader.fill();
+        }
+        //System.out.println("  freqIndex=" + freqIndex + " posIndex=" + posIndex);
         
         posIndex.read(docReader, true);
+        // nocommit -- only store this if storePayloads is true
         // skip payload offset
         docReader.readVLong();
       } else {
         freq = 1;
       }
+
       skipOffset = docReader.readVLong();
 
+      docDeltaUpto = docReader.offset();
+      docDeltaLimit = docReader.end();
+
       docFreq = termState.docFreq;
+      assert docFreq > 0;
       count = 0;
       doc = 0;
       skipped = false;
+      //System.out.println("  docFreq=" + docFreq);
 
       return this;
     }
 
+    public boolean canReuse(IntIndexInput docsIn) {
+      return startDocIn == docsIn;
+    }
+
     @Override
     public int nextDoc() throws IOException {
+      //System.out.println("  sep.nextDoc");
 
       while(true) {
         if (count == docFreq) {
           return doc = NO_MORE_DOCS;
         }
 
+        assert docDeltaUpto <= docDeltaLimit: "docDeltaUpto=" + docDeltaUpto + " docDeltaLimit=" + docDeltaLimit;
+
+        if (docDeltaUpto == docDeltaLimit) {
+          // refill
+          //System.out.println("    fill docs");
+          docDeltaLimit = docReader.fill();
+          docDeltaUpto = 0;
+        }
+
         count++;
 
         // Decode next doc
-        doc += docReader.next();
+        doc += docDeltaBuffer[docDeltaUpto++];
+        //System.out.println("    doc="+ doc + " docDeltaUpto=" + (docDeltaUpto-1) + " skipDocs=" + skipDocs + " deleted?=" + (skipDocs != null && skipDocs.get(doc)));
           
         if (!omitTF) {
-          freq = freqReader.next();
+          if (freqUpto == freqLimit) {
+            // refill
+            //System.out.println("    fill freqs");
+            freqLimit = freqReader.fill();
+            freqUpto = 0;
+          }
+
+          freq = freqBuffer[freqUpto++];
         }
 
         if (skipDocs == null || !skipDocs.get(doc)) {
@@ -303,30 +355,6 @@ public class SepPostingsReaderImpl exten
     }
 
     @Override
-    public int read() throws IOException {
-      // TODO: -- switch to bulk read api in IntIndexInput
-      final int[] docs = bulkResult.docs.ints;
-      final int[] freqs = bulkResult.freqs.ints;
-      int i = 0;
-      final int length = docs.length;
-      while (i < length && count < docFreq) {
-        count++;
-        // manually inlined call to next() for speed
-        doc += docReader.next();
-        if (!omitTF) {
-          freq = freqReader.next();
-        }
-
-        if (skipDocs == null || !skipDocs.get(doc)) {
-          docs[i] = doc;
-          freqs[i] = freq;
-          i++;
-        }
-      }
-      return i;
-    }
-
-    @Override
     public int freq() {
       return freq;
     }
@@ -338,9 +366,11 @@ public class SepPostingsReaderImpl exten
 
     @Override
     public int advance(int target) throws IOException {
+      //System.out.println("SepDocsEnum.advance target=" + target);
 
       // TODO: jump right to next() if target is < X away
       // from where we are now?
+      //System.out.println("SepDocsEnum.advance target=" + target);
 
       if (docFreq >= skipInterval) {
 
@@ -349,6 +379,7 @@ public class SepPostingsReaderImpl exten
 
         if (skipper == null) {
           // This DocsEnum has never done any skipping
+          //System.out.println("  init skipper");
           skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
                                           freqIn,
                                           docIn,
@@ -358,6 +389,7 @@ public class SepPostingsReaderImpl exten
         }
 
         if (!skipped) {
+          //System.out.println("  init skipper2");
           // We haven't yet skipped for this posting
           skipper.init(skipOffset,
                        docIndex,
@@ -374,14 +406,25 @@ public class SepPostingsReaderImpl exten
         final int newCount = skipper.skipTo(target); 
 
         if (newCount > count) {
-
           // Skipper did move
           if (!omitTF) {
             skipper.getFreqIndex().seek(freqReader);
+            freqUpto = freqReader.offset();
+            freqLimit = freqReader.end();
+            if (freqUpto >= freqLimit) {
+              freqLimit = freqReader.fill();
+            }
           }
           skipper.getDocIndex().seek(docReader);
+          docDeltaUpto = docReader.offset();
+          docDeltaLimit = docReader.end();
+          if (docDeltaUpto >= docDeltaLimit) {
+            docDeltaLimit = docReader.fill();
+          }
+
           count = newCount;
           doc = skipper.getDoc();
+          //System.out.println("  did move count=" + newCount + " doc=" + doc);
         }
       }
         
@@ -401,91 +444,148 @@ public class SepPostingsReaderImpl exten
     int doc;
     int count;
     int freq;
-    long freqStart;
 
     private boolean storePayloads;
     private Bits skipDocs;
-    private final IntIndexInput.Reader docReader;
-    private final IntIndexInput.Reader freqReader;
-    private final IntIndexInput.Reader posReader;
-    private final IndexInput payloadIn;
+    private final BulkPostingsEnum.BlockReader docReader;
+    private final int[] docDeltaBuffer;
+    private int docDeltaUpto;
+    private int docDeltaLimit;
+    private final BulkPostingsEnum.BlockReader freqReader;
+    private final int[] freqBuffer;
+    private int freqUpto;
+    private int freqLimit;
+    private final BulkPostingsEnum.BlockReader posReader;
+    private final int[] posBuffer;
+    private int posUpto;
+    private int posLimit;
     private long skipOffset;
+    private long payloadOffset;
+
+    private final IndexInput payloadIn;
 
     private final IntIndexInput.Index docIndex;
     private final IntIndexInput.Index freqIndex;
     private final IntIndexInput.Index posIndex;
     private final IntIndexInput startDocIn;
 
-    private long payloadOffset;
-
     private int pendingPosCount;
     private int position;
     private int payloadLength;
     private long pendingPayloadBytes;
-
-    private boolean skipped;
-    private SepSkipListReader skipper;
     private boolean payloadPending;
     private boolean posSeekPending;
 
-    SepDocsAndPositionsEnum() throws IOException {
+    boolean skipped;
+    SepSkipListReader skipper;
+
+    public SepDocsAndPositionsEnum() throws IOException {
       startDocIn = docIn;
       docReader = docIn.reader();
+      docDeltaBuffer = docReader.getBuffer();
       docIndex = docIn.index();
       freqReader = freqIn.reader();
+      freqBuffer = freqReader.getBuffer();
       freqIndex = freqIn.index();
       posReader = posIn.reader();
+      posBuffer = posReader.getBuffer();
       posIndex = posIn.index();
       payloadIn = (IndexInput) SepPostingsReaderImpl.this.payloadIn.clone();
     }
 
+    // nocommit -- somehow we have to prevent re-decode of
+    // the same block if we have just .next()'d to next term
+    // in the terms dict -- this is an O(N^2) cost to eg
+    // TermRangeQuery when it steps through low freq terms!!
     SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
       this.skipDocs = skipDocs;
+      //System.out.println("sep d&p init");
+      assert !fieldInfo.omitTermFreqAndPositions;
       storePayloads = fieldInfo.storePayloads;
 
       // TODO: can't we only do this if consumer
       // skipped consuming the previous docs?
       docIndex.set(termState.docIndex);
+      // nocommit -- verify, during merge, this seek is
+      // sometimes w/in block:
       docIndex.seek(docReader);
+      docDeltaLimit = docReader.end();
+      docDeltaUpto = docReader.offset();
+      if (docDeltaUpto >= docDeltaLimit) {
+        docDeltaLimit = docReader.fill();
+      }
 
       freqIndex.read(docReader, true);
       freqIndex.seek(freqReader);
+      freqLimit = freqReader.end();
+      freqUpto = freqReader.offset();
+      if (freqUpto >= freqLimit) {
+        //System.out.println("  re-fill freqs freqMax=" + freqLimit);
+        freqLimit = freqReader.fill();
+      }
+      //System.out.println("  freqIndex=" + freqIndex);
 
       posIndex.read(docReader, true);
       posSeekPending = true;
       payloadPending = false;
 
       payloadOffset = docReader.readVLong();
+      //System.out.println("  payloadOffset=" + payloadOffset);
       skipOffset = docReader.readVLong();
+      //System.out.println("  skipOffset=" + skipOffset);
+
+      docDeltaLimit = docReader.end();
+      docDeltaUpto = docReader.offset();
+      /*
+      if (docDeltaUpto >= docDeltaLimit) {
+        // nocommit -- needed anymore?
+        docDeltaLimit = docReader.fill();
+        docDeltaUpto = 0;
+      }
+      */
 
       docFreq = termState.docFreq;
+      assert docFreq > 0;
       count = 0;
       doc = 0;
       pendingPosCount = 0;
       pendingPayloadBytes = 0;
       skipped = false;
 
+      //System.out.println("  docUpto=" + docDeltaUpto + " docMax=" + docDeltaLimit + " freqUpto=" + freqUpto + " freqMax=" + freqLimit);
+
       return this;
     }
 
+    public boolean canReuse(IntIndexInput docsIn) {
+      return startDocIn == docsIn;
+    }
+
     @Override
     public int nextDoc() throws IOException {
-
       while(true) {
         if (count == docFreq) {
           return doc = NO_MORE_DOCS;
         }
 
-        count++;
+        if (docDeltaUpto == docDeltaLimit) {
+          // refill
+          docDeltaLimit = docReader.fill();
+          docDeltaUpto = 0;
+        }
 
-        // TODO: maybe we should do the 1-bit trick for encoding
-        // freq=1 case?
+        count++;
 
         // Decode next doc
-        doc += docReader.next();
+        doc += docDeltaBuffer[docDeltaUpto++];
           
-        freq = freqReader.next();
+        if (freqUpto == freqLimit) {
+          // refill
+          freqLimit = freqReader.fill();
+          freqUpto = 0;
+        }
 
+        freq = freqBuffer[freqUpto++];
         pendingPosCount += freq;
 
         if (skipDocs == null || !skipDocs.get(doc)) {
@@ -525,6 +625,7 @@ public class SepPostingsReaderImpl exten
                                           docIn,
                                           posIn,
                                           maxSkipLevels, skipInterval);
+
         }
 
         if (!skipped) {
@@ -536,7 +637,6 @@ public class SepPostingsReaderImpl exten
                        payloadOffset,
                        docFreq,
                        storePayloads);
-
           skipped = true;
         }
 
@@ -546,13 +646,24 @@ public class SepPostingsReaderImpl exten
 
           // Skipper did move
           skipper.getFreqIndex().seek(freqReader);
+          freqUpto = freqReader.offset();
+          freqLimit = freqReader.end();
+          if (freqUpto >= freqLimit) {
+            freqLimit = freqReader.fill();
+          }
+
           skipper.getDocIndex().seek(docReader);
-          //skipper.getPosIndex().seek(posReader);
+          docDeltaUpto = docReader.offset();
+          docDeltaLimit = docReader.end();
+          if (docDeltaUpto >= docDeltaLimit) {
+            docDeltaLimit = docReader.fill();
+          }
+
           posIndex.set(skipper.getPosIndex());
           posSeekPending = true;
           count = newCount;
           doc = skipper.getDoc();
-          //payloadIn.seek(skipper.getPayloadPointer());
+
           payloadOffset = skipper.getPayloadPointer();
           pendingPosCount = 0;
           pendingPayloadBytes = 0;
@@ -575,6 +686,11 @@ public class SepPostingsReaderImpl exten
     public int nextPosition() throws IOException {
       if (posSeekPending) {
         posIndex.seek(posReader);
+        posLimit = posReader.end();
+        posUpto = posReader.offset();
+        if (posUpto >= posLimit) {
+          posLimit = posReader.fill();
+        }
         payloadIn.seek(payloadOffset);
         posSeekPending = false;
       }
@@ -582,10 +698,12 @@ public class SepPostingsReaderImpl exten
       // scan over any docs that were iterated without their
       // positions
       while (pendingPosCount > freq) {
-        final int code = posReader.next();
+
+        final int code = nextPosInt();
+
         if (storePayloads && (code & 1) != 0) {
           // Payload length has changed
-          payloadLength = posReader.next();
+          payloadLength = nextPosInt();
           assert payloadLength >= 0;
         }
         pendingPosCount--;
@@ -593,11 +711,12 @@ public class SepPostingsReaderImpl exten
         pendingPayloadBytes += payloadLength;
       }
 
-      final int code = posReader.next();
+      final int code = nextPosInt();
+
       if (storePayloads) {
         if ((code & 1) != 0) {
           // Payload length has changed
-          payloadLength = posReader.next();
+          payloadLength = nextPosInt();
           assert payloadLength >= 0;
         }
         position += code >> 1;
@@ -612,6 +731,14 @@ public class SepPostingsReaderImpl exten
       return position;
     }
 
+    private int nextPosInt() throws IOException {
+      if (posUpto == posLimit) {
+        posLimit = posReader.fill();
+        posUpto = 0;
+      }
+      return posBuffer[posUpto++];
+    }
+
     private BytesRef payload;
 
     @Override
@@ -645,4 +772,261 @@ public class SepPostingsReaderImpl exten
       return payloadPending && payloadLength > 0;
     }
   }
+
+  class SepBulkPostingsEnum extends BulkPostingsEnum {
+    private int docFreq;
+
+    private final BulkPostingsEnum.BlockReader docReader;
+    private final IntIndexInput.Index docIndex;
+
+    private final BulkPostingsEnum.BlockReader freqReader;
+    private final IntIndexInput.Index freqIndex;
+
+    private final BulkPostingsEnum.BlockReader posReader;
+    private final IntIndexInput.Index posIndex;
+
+    private final boolean storePayloads;
+    private final boolean omitTF;
+    private long skipOffset;
+
+    private final IntIndexInput startDocIn;
+
+    private boolean skipped;
+    private SepSkipListReader skipper;
+
+    public SepBulkPostingsEnum(FieldInfo fieldInfo, boolean doFreq, boolean doPos) throws IOException {
+      this.storePayloads = fieldInfo.storePayloads;
+      this.omitTF = fieldInfo.omitTermFreqAndPositions;
+      startDocIn = docIn;
+      docReader = docIn.reader();
+      docIndex = docIn.index();
+
+      if (doFreq && !omitTF) {
+        freqReader = freqIn.reader();
+      } else {
+        freqReader = null;
+      }
+
+      if (doPos && !omitTF) {
+        if (storePayloads) {
+          // Must rewrite each posDelta:
+          posReader = new PosPayloadReader(posIn.reader());
+        } else {
+          // Pass through
+          posReader = posIn.reader();
+        }
+      } else {
+        posReader = null;
+      }
+
+      if (!omitTF) {
+        // we have to pull these even if doFreq is false
+        // just so we can decode the index from the docs
+        // file
+        freqIndex = freqIn.index();
+        posIndex = posIn.index();
+      } else {
+        posIndex = null;
+        freqIndex = null;
+      }
+    }
+
+    public boolean canReuse(FieldInfo fieldInfo, IntIndexInput docIn, boolean doFreq, boolean doPos) {
+      return fieldInfo.storePayloads == storePayloads &&
+        startDocIn == docIn &&
+        (freqReader != null || !doFreq) &&
+        (posReader != null || !doPos);
+    }
+
+    // nocommit -- make sure this is tested!!
+
+    // Only used when payloads were stored -- we cannot do
+    // pass-through read for this since the payload lengths
+    // are also encoded into the position deltas
+    private final class PosPayloadReader extends BulkPostingsEnum.BlockReader {
+      final BulkPostingsEnum.BlockReader other;
+      private int pendingOffset;
+      private int limit;
+      private boolean skipNext;
+
+      public PosPayloadReader(BulkPostingsEnum.BlockReader other) {
+        this.other = other;
+      }
+
+      void doAfterSeek() {}
+
+      @Override
+      public int[] getBuffer() {
+        return other.getBuffer();
+      }
+
+      // nocommit -- make sure this works correctly in the
+      // "reuse"/seek case
+      @Override
+      public int offset() {
+        pendingOffset = other.offset();
+        return 0;
+      }
+
+      @Override
+      public void setOffset(int offset) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int fill() throws IOException {
+        // Translate code back to pos deltas, and filter out
+        // any changes in payload length.  NOTE: this is a
+        // perf hit on indices that encode payloads, even if
+        // they use "normal" positional queries
+        final int otherLimit = other.fill();
+        limit = 0;
+        final int[] buffer = other.getBuffer();
+        for(int i=pendingOffset;i<otherLimit;i++) {
+          if (skipNext) {
+            skipNext = false;
+          } else {
+            final int code = buffer[i];
+            buffer[limit++] = code >>> 1;
+            if ((code & 1) != 0) {
+              // skip the payload length
+              skipNext = true;
+            }
+          }
+        }
+        pendingOffset = 0;
+
+        return limit;
+      }
+
+      @Override
+      public int end() {
+        return limit;
+      }
+    }
+
+    /** Position readers to the specified term */
+    SepBulkPostingsEnum init(SepTermState termState) throws IOException {
+
+      // nocommit -- make sure seek w/in buffer is efficient
+      // here:
+
+      // TODO: can't we only do this if consumer
+      // skipped consuming the previous docs?
+      docIndex.set(termState.docIndex);
+      docIndex.seek(docReader);
+      //System.out.println("sep init offset=" + docReader.offset() + " limit=" + docReader.end() + " omitTF=" + omitTF);
+      //System.out.println("  v[0]=" + docReader.getBuffer()[0]);
+
+      if (!omitTF) {
+        freqIndex.read(docReader, true);
+        if (freqReader != null) {
+          freqIndex.seek(freqReader);
+        }
+        posIndex.read(docReader, true);
+        // skip payload offset -- nocommit only store this
+        // if field has payloads
+        docReader.readVLong();
+      }
+
+      skipOffset = docReader.readVLong();
+      //System.out.println("skipOffset=" + skipOffset);
+
+      if (posReader != null) {
+        if (storePayloads) {
+          PosPayloadReader posPayloadReader = (PosPayloadReader) posReader;
+          posIndex.seek(posPayloadReader.other);
+          posPayloadReader.doAfterSeek();
+        } else {
+          posIndex.seek(posReader);
+        }
+      }
+
+      if (docReader.offset() >= docReader.end()) {
+        docReader.fill();
+        docReader.setOffset(0);
+      }
+
+      docFreq = termState.docFreq;
+      skipped = false;
+
+      return this;
+    }
+
+    @Override
+    public BulkPostingsEnum.BlockReader getDocDeltasReader() {
+      // Maximize perf -- just pass through the underlying
+      // intblock reader:
+      return docReader;
+    }
+
+    @Override
+    public BulkPostingsEnum.BlockReader getFreqsReader() {
+      // Maximize perf -- just pass through the underlying
+      // intblock reader:
+      return freqReader;
+    }
+
+    @Override
+    public BulkPostingsEnum.BlockReader getPositionDeltasReader() {
+      // Maximize perf -- just pass through the underlying
+      // intblock reader (if payloads were not indexed):
+      return posReader;
+    }
+
+    private final JumpResult jumpResult = new JumpResult();
+
+    @Override
+    public JumpResult jump(int target, int curCount) throws IOException {
+
+      if (docFreq >= skipInterval) {
+
+        // There are enough docs in the posting to have
+        // skip data
+
+        if (skipper == null) {
+          // This enum has never done any skipping
+          skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
+                                          freqIn,
+                                          docIn,
+                                          posIn,
+                                          maxSkipLevels, skipInterval);
+        }
+
+        if (!skipped) {
+          // We haven't yet skipped for this particular posting
+          skipper.init(skipOffset,
+                       docIndex,
+                       freqIndex,
+                       posIndex,
+                       0,
+                       docFreq,
+                       storePayloads);
+          skipper.setOmitTF(omitTF);
+          skipped = true;
+        }
+
+        final int newCount = skipper.skipTo(target); 
+        //System.out.println("  sep skip newCount=" + newCount + " vs count=" + curCount);
+
+        if (newCount > curCount) {
+
+          // Skipper did move -- seek all readers:
+          skipper.getDocIndex().seek(docReader);
+
+          if (freqReader != null) {
+            skipper.getFreqIndex().seek(freqReader);
+          }
+          if (posReader != null) {
+            skipper.getPosIndex().seek(posReader);
+          }
+
+          jumpResult.count = newCount;
+          jumpResult.docID = skipper.getDoc();
+          return jumpResult;
+        }
+      }
+      return null;
+    }        
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Tue Dec 14 17:18:00 2010
@@ -181,6 +181,7 @@ public final class SepPostingsWriterImpl
         posIndex.write(docOut, true);
         docOut.writeVLong(payloadStart);
       }
+      // nocommit -- only write if docFreq > skipInterval?
       docOut.writeVLong(skipOut.getFilePointer());
       firstDoc = false;
     }
@@ -199,6 +200,7 @@ public final class SepPostingsWriterImpl
     }
 
     lastDocID = docID;
+    //System.out.println("sepw: write docID=" + docID);
     docOut.write(delta);
     if (!omitTF) {
       freqOut.write(termDocFreq);

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Tue Dec 14 17:18:00 2010
@@ -108,10 +108,10 @@ class SepSkipListReader extends MultiLev
 
     for(int i=0;i<maxNumberOfSkipLevels;i++) {
       docIndex[i].set(docBaseIndex);
-      if (freqIndex != null) {
+      if (freqIndex != null && freqBaseIndex != null) {
         freqIndex[i].set(freqBaseIndex);
       }
-      if (posBaseIndex != null) {
+      if (posBaseIndex != null && freqBaseIndex != null) {
         posIndex[i].set(posBaseIndex);
       }
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Tue Dec 14 17:18:00 2010
@@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldsEnu
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.store.IndexInput;
@@ -217,6 +218,17 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
+    public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreq, boolean doPositions) throws IOException {
+      SimpleTextBulkPostingsEnum bulkPostingsEnum;
+      if (reuse != null && reuse instanceof SimpleTextBulkPostingsEnum && ((SimpleTextBulkPostingsEnum) reuse).canReuse(in, doFreq, doPositions)) {
+        bulkPostingsEnum = (SimpleTextBulkPostingsEnum) reuse;
+      } else {
+        bulkPostingsEnum = new SimpleTextBulkPostingsEnum(doFreq, doPositions);
+      }
+      return bulkPostingsEnum.reset(docsStart, omitTF);
+    }
+
+    @Override
     public Comparator<BytesRef> getComparator() {
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
@@ -439,6 +451,189 @@ class SimpleTextFieldsReader extends Fie
     }
   }
 
+  private class SimpleTextBulkPostingsEnum extends BulkPostingsEnum {
+    private final IndexInput inStart;
+    private final IndexInput in;
+    private final LineCountReader docDeltasReader;
+    private final FreqsReader freqsReader;
+    private final LineCountReader positionDeltasReader;
+
+    public SimpleTextBulkPostingsEnum(boolean doFreq, boolean doPositions) {
+      this.inStart = SimpleTextFieldsReader.this.in;
+      this.in = (IndexInput) this.inStart.clone();
+      docDeltasReader = new LineCountReader(DOC);
+      if (doFreq) {
+        freqsReader = new FreqsReader();
+      } else {
+        freqsReader = null;
+      }
+
+      if (doPositions) {
+        positionDeltasReader = new LineCountReader(POS);
+      } else {
+        positionDeltasReader = null;
+      }
+    }
+
+    public boolean canReuse(IndexInput in, boolean doFreq, boolean doPositions) {
+      return in == inStart && (doFreq == (freqsReader != null)) && (doPositions == (positionDeltasReader != null));
+    }
+
+    // reads docDeltas & positionDeltas
+    private class LineCountReader extends BlockReader {
+      private final BytesRef prefix;
+      private final int[] buffer = new int[64];
+      private final IndexInput in;
+      private final BytesRef scratch = new BytesRef(10);
+      private int lastValue;
+      private int limit;
+
+      public LineCountReader(BytesRef prefix) {
+        this.prefix = prefix;
+        this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+      }
+
+      public void reset(long fp) throws IOException {
+        lastValue = 0;
+        in.seek(fp);
+        fill();
+      }
+
+      @Override
+      public int[] getBuffer() {
+        return buffer;
+      }
+
+      @Override
+      public int offset() {
+        return 0;
+      }
+
+      @Override
+      public void setOffset(int offset) {
+        assert offset == 0;
+      }
+
+      @Override
+      public int end() {
+        return limit;
+      }
+
+      @Override
+      public int fill() throws IOException {
+        int upto = 0;
+        while(upto < buffer.length) {
+          readLine(in, scratch);
+          if (scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.equals(END)) {
+            break;
+          } else if (scratch.startsWith(prefix)) {
+            final int value = Integer.parseInt(new String(scratch.bytes, scratch.offset+prefix.length, scratch.length-prefix.length));            
+            buffer[upto++] = value - lastValue;
+            lastValue = value;
+          }
+        }
+        return limit = upto;
+      }
+    }
+
+    private class FreqsReader extends BlockReader {
+      private final int[] buffer = new int[64];
+      private final IndexInput in;
+      private final BytesRef scratch = new BytesRef(10);
+      private int limit;
+      private boolean omitTF;
+
+      public FreqsReader() {
+        this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+      }
+
+      public void reset(long fp, boolean omitTF) throws IOException {
+        in.seek(fp);
+        this.omitTF = omitTF;
+        fill();
+      }
+
+      @Override
+      public int[] getBuffer() {
+        return buffer;
+      }
+
+      @Override
+      public int offset() {
+        return 0;
+      }
+
+      @Override
+      public void setOffset(int offset) {
+        assert offset == 0;
+      }
+
+      @Override
+      public int end() {
+        return limit;
+      }
+
+      @Override
+      public int fill() throws IOException {
+        int upto = 0;
+        int freq = -1;
+        long lastFP = in.getFilePointer();
+        while(upto < buffer.length) {
+          lastFP = in.getFilePointer();
+          readLine(in, scratch);
+          if (scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.equals(END)) {
+            if (freq != -1) {
+              buffer[upto++] = omitTF ? 1 : freq;
+            }
+            break;
+          } else if (scratch.startsWith(DOC)) {
+            if (freq != -1) {
+              buffer[upto++] = omitTF ? 1: freq;
+            }
+            freq = 0;
+          } else if (scratch.startsWith(POS)) {
+            freq++;
+          }
+        }
+        in.seek(lastFP);
+        return limit = upto;
+      }
+    }
+  
+    public SimpleTextBulkPostingsEnum reset(long fp, boolean omitTF) throws IOException {
+
+      docDeltasReader.reset(fp);
+    
+      if (freqsReader != null) {
+        freqsReader.reset(fp, omitTF);
+      }
+      if (positionDeltasReader != null) {
+        positionDeltasReader.reset(fp);
+      }
+      return this;
+    }
+
+    @Override
+    public BlockReader getDocDeltasReader() {
+      return docDeltasReader;
+    }
+
+    @Override
+    public BlockReader getPositionDeltasReader() {
+      return positionDeltasReader;
+    }
+
+    @Override
+    public BlockReader getFreqsReader() {
+      return freqsReader;
+    }
+
+    @Override
+    public JumpResult jump(int target, int curCount) {
+      return null;
+    }
+  }
+
   private class SimpleTextTerms extends Terms {
     private final String field;
     private final long termsStart;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Tue Dec 14 17:18:00 2010
@@ -24,6 +24,7 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
@@ -170,6 +171,17 @@ public class StandardPostingsReader exte
   }
 
   @Override
+  public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+    SegmentBulkPostingsEnum postingsEnum;
+    if (reuse == null || !(reuse instanceof SegmentBulkPostingsEnum) || !((SegmentBulkPostingsEnum) reuse).canReuse(fieldInfo, freqIn, doFreqs, doPositions)) {
+      postingsEnum = new SegmentBulkPostingsEnum(fieldInfo.omitTermFreqAndPositions, doFreqs, doPositions);
+    } else {
+      postingsEnum = (SegmentBulkPostingsEnum) reuse;
+    }
+    return postingsEnum.reset(fieldInfo, (DocTermState) termState);
+  }
+
+  @Override
   public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     if (fieldInfo.omitTermFreqAndPositions) {
       return null;
@@ -248,6 +260,7 @@ public class StandardPostingsReader exte
       // cases
       freqIn.seek(termState.freqOffset);
       limit = termState.docFreq;
+      assert limit > 0;
       ord = 0;
       doc = 0;
 
@@ -420,6 +433,7 @@ public class StandardPostingsReader exte
       lazyProxPointer = termState.proxOffset;
 
       limit = termState.docFreq;
+      assert limit > 0;
       ord = 0;
       doc = 0;
       position = 0;
@@ -796,4 +810,328 @@ public class StandardPostingsReader exte
       return payloadPending && payloadLength > 0;
     }
   }
+  
+  static final int BULK_BUFFER_SIZE = 64;
+  
+  // Bulk postings API
+  private final class SegmentBulkPostingsEnum extends BulkPostingsEnum {
+    private final IndexInput freqIn;
+    private final IndexInput proxIn;
+  
+    final IndexInput startFreqIn;
+    private final boolean omitTF;
+  
+    boolean storePayloads;                        // does current field store payloads?
+  
+    int ord;                                      // how many docs we've read
+    int docFreq;
+  
+    long freqOffset;
+    long proxOffset;
+    int skipOffset;
+  
+    boolean skipped;
+    DefaultSkipListReader skipper;
+    private int payloadLength;
+  
+    private final DocDeltasReader docDeltasReader;
+    private final FreqsReader freqsReader;
+    private final PositionsReader positionDeltasReader;
+  
+    private boolean docsPending, freqsPending;
+  
+    public SegmentBulkPostingsEnum(boolean omitTF, boolean doFreqs, boolean doPositions) throws IOException {
+      //System.out.println("bulk init");
+      startFreqIn = StandardPostingsReader.this.freqIn;
+      this.freqIn = (IndexInput) StandardPostingsReader.this.freqIn.clone();
+      this.omitTF = omitTF;
+  
+      docDeltasReader = new DocDeltasReader();
+      if (doFreqs && !omitTF) {
+        freqsReader = new FreqsReader();
+      } else {
+        freqsReader = null;
+      }
+  
+      if (doPositions && !omitTF) {
+        this.proxIn = (IndexInput) StandardPostingsReader.this.proxIn.clone();
+        positionDeltasReader = new PositionsReader();
+      } else {
+        this.proxIn = null;
+        positionDeltasReader = null;
+      }
+    }
+  
+    public boolean canReuse(FieldInfo fieldInfo, IndexInput freqin, boolean doFreqs, boolean doPositions) {
+      return freqIn == startFreqIn &&
+        (!doFreqs || freqsReader == null) &&
+        (!doPositions || positionDeltasReader == null) && 
+        (omitTF == fieldInfo.omitTermFreqAndPositions);
+    }
+  
+    final void read() throws IOException {
+      if (freqsReader == null) {
+        // Consumer only wants doc deltas
+        assert !docsPending;
+        if (omitTF) {
+          // Index only stores doc deltas
+          for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+            docDeltasReader.buffer[i] = freqIn.readVInt();
+          }
+        } else {
+          // Index stores doc deltas & freq
+          for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+            final int code = freqIn.readVInt();
+            docDeltasReader.buffer[i] = code >>> 1;
+            if ((code & 1) == 0) {
+              freqIn.readVInt();
+            }
+          }
+        }
+        ord += BULK_BUFFER_SIZE;
+        docsPending = true;
+      } else {
+        // Consumer wants both
+        assert !docsPending;
+        assert !freqsPending;
+        for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+          final int code = freqIn.readVInt();
+          docDeltasReader.buffer[i] = code >>> 1;
+          if ((code & 1) == 0) {
+            freqsReader.buffer[i] = freqIn.readVInt();
+          } else {
+            freqsReader.buffer[i] = 1;
+          }
+        }
+        ord += BULK_BUFFER_SIZE;
+        docsPending = true;
+        freqsPending = true;
+      }
+    }
+  
+    private class DocDeltasReader extends BulkPostingsEnum.BlockReader {
+      final int[] buffer = new int[BULK_BUFFER_SIZE];
+      int limit;
+      int offset;
+  
+      @Override
+      public int[] getBuffer() {
+        return buffer;
+      }
+  
+      @Override
+      public int end() {
+        return limit;
+      }
+  
+      @Override
+      public int fill() throws IOException {
+        if (!docsPending) {
+          read();
+        }
+        docsPending = false;
+        limit = BULK_BUFFER_SIZE;
+        offset = 0;
+        //System.out.println("spr: doc deltas read limit=" + limit);
+        return BULK_BUFFER_SIZE;
+      }
+  
+      @Override
+      public int offset() {
+        return offset;
+      }
+  
+      @Override
+      public void setOffset(int offset) {
+        this.offset = offset;
+      }
+    }
+  
+    private class FreqsReader extends BulkPostingsEnum.BlockReader {
+      final int[] buffer = new int[BULK_BUFFER_SIZE];
+      int limit;
+  
+      @Override
+      public int[] getBuffer() {
+        return buffer;
+      }
+  
+      @Override
+      public int end() {
+        return limit;
+      }
+  
+      @Override
+      public int fill() throws IOException {
+        if (!freqsPending) {
+          read();
+        }
+        freqsPending = false;
+        limit = BULK_BUFFER_SIZE;
+        return BULK_BUFFER_SIZE;
+      }
+  
+      @Override
+      public int offset() {
+        return 0;
+      }
+  
+      @Override
+      public void setOffset(int offset) {
+        throw new UnsupportedOperationException();
+      }
+    }
+  
+    private class PositionsReader extends BulkPostingsEnum.BlockReader {
+      final int[] buffer = new int[BULK_BUFFER_SIZE];
+      int limit;
+  
+      @Override
+      public int[] getBuffer() {
+        return buffer;
+      }
+  
+      @Override
+      public int end() {
+        return limit;
+      }
+  
+      @Override
+      public int fill() throws IOException {
+        // nocommit -- must flush prx file w/ extra 127 0
+        // positions -- index change!!
+        if (storePayloads) {
+          for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+            final int code = proxIn.readVInt();
+            buffer[i] = code >>> 1;
+            if ((code & 1) != 0) {
+              payloadLength = proxIn.readVInt();
+            }
+            if (payloadLength != 0) {
+              // skip payload
+              proxIn.seek(proxIn.getFilePointer()+payloadLength);
+            }
+          }
+        } else {
+          for(int i=0;i<BULK_BUFFER_SIZE;i++) {
+            buffer[i] = proxIn.readVInt();
+          }
+        }
+        limit = BULK_BUFFER_SIZE;
+        return BULK_BUFFER_SIZE;
+      }
+  
+      @Override
+      public int offset() {
+        return 0;
+      }
+  
+      @Override
+      public void setOffset(int offset) {
+        throw new UnsupportedOperationException();
+      }
+    }
+    
+    @Override
+    public BlockReader getDocDeltasReader() {
+      return docDeltasReader;
+    }
+      
+    @Override
+    public BlockReader getFreqsReader() {
+      return freqsReader;
+    }
+  
+    @Override
+    public BlockReader getPositionDeltasReader() {
+      return positionDeltasReader;
+    }
+  
+    public SegmentBulkPostingsEnum reset(FieldInfo fieldInfo, DocTermState termState) throws IOException {
+      storePayloads = fieldInfo.storePayloads;
+      freqOffset = termState.freqOffset;
+      freqIn.seek(freqOffset);
+  
+      // TODO: for full enum case (eg segment merging) this
+      // seek is unnecessary; maybe we can avoid in such
+      // cases
+      if (positionDeltasReader != null) {
+        // nocommit -- how come this is a live seek but
+        // frq/doc is not?
+        proxOffset = termState.proxOffset;
+        proxIn.seek(proxOffset);
+      }
+  
+      skipOffset = termState.skipOffset;
+      docFreq = termState.docFreq;
+      assert docFreq > 0;
+  
+      ord = 0;
+      skipped = false;
+  
+      return this;
+    }
+  
+    private final JumpResult jumpResult = new JumpResult();
+  
+    @Override
+    public JumpResult jump(int target, int curCount) throws IOException {
+  
+      // TODO: jump right to next() if target is < X away
+      // from where we are now?
+  
+      if (skipOffset > 0) {
+  
+        // There are enough docs in the posting to have
+        // skip data
+  
+        if (skipper == null) {
+          // This is the first time this enum has ever been used for skipping -- do lazy init
+          skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
+        }
+  
+        if (!skipped) {
+  
+          // This is the first time this posting has
+          // skipped since reset() was called, so now we
+          // load the skip data for this posting
+          skipper.init(freqOffset + skipOffset,
+                       freqOffset, proxOffset,
+                       docFreq, storePayloads);
+  
+          skipped = true;
+        }
+  
+        final int newOrd = skipper.skipTo(target); 
+  
+        // nocommit rename ord -> count
+        assert curCount == ord: "curCount=" + curCount + " ord=" + ord;
+  
+        if (newOrd > ord) {
+          // Skipper moved
+          //System.out.println("newOrd=" + newOrd + " vs ord=" + ord + " doc=" + skipper.getDoc());
+  
+          freqIn.seek(skipper.getFreqPointer());
+          docDeltasReader.limit = 0;
+  
+          if (freqsReader != null) {
+            freqsReader.limit = 0;
+          }
+  
+          if (positionDeltasReader != null) {
+            positionDeltasReader.limit = 0;
+            proxIn.seek(skipper.getProxPointer());
+          }
+  
+          jumpResult.count = ord = newOrd;
+          jumpResult.docID = skipper.getDoc();
+  
+          return jumpResult;
+        }
+      }
+  
+      // no jump occurred
+      return null;
+    }
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Tue Dec 14 17:18:00 2010
@@ -224,6 +224,16 @@ public final class StandardPostingsWrite
 
   @Override
   public void close() throws IOException {
+
+    // Readers read whole blocks at once, so we have to
+    // flush final block out w/ unused values:
+    for(int i=0;i<StandardPostingsReader.BULK_BUFFER_SIZE-1;i++) {
+      freqOut.writeVInt(1);
+      if (proxOut != null) {
+        proxOut.writeVInt(0);
+      }
+    }
+ 
     try {
       freqOut.close();
     } finally {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Tue Dec 14 17:18:00 2010
@@ -239,6 +239,9 @@ final class BooleanScorer extends Scorer
     do {
       bucketTable.first = null;
       
+      // used only by assert:
+      int count = 0;
+
       while (current != null) {         // more queued 
 
         // check prohibited & required
@@ -264,6 +267,8 @@ final class BooleanScorer extends Scorer
           }
         }
         
+        assert count++ < BucketTable.SIZE;
+        assert current != current.next;
         current = current.next;         // pop the queue
       }
       

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Tue Dec 14 17:18:00 2010
@@ -142,6 +142,11 @@ public class ConstantScoreQuery extends 
     public int nextDoc() throws IOException {
       return docIdSetIterator.nextDoc();
     }
+
+    @Override
+    public String toString() {
+      return "ConstantScorer(" + filter + ")";
+    }
     
     @Override
     public int docID() {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Dec 14 17:18:00 2010
@@ -23,6 +23,7 @@ import java.util.Comparator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
@@ -150,6 +151,12 @@ public abstract class FilteredTermsEnum 
   public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException {
     return tenum.docs(bits, reuse);
   }
+
+  @Override
+  public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+    assert tenum != null;
+    return tenum.bulkPostings(reuse, doFreqs, doPositions);
+  }
     
   @Override
   public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Tue Dec 14 17:18:00 2010
@@ -23,7 +23,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.Bits;
@@ -125,26 +125,32 @@ public class MultiTermQueryWrapperFilter
       final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
       int termCount = 0;
       final Bits delDocs = MultiFields.getDeletedDocs(reader);
-      DocsEnum docsEnum = null;
+      BulkPostingsEnum postingsEnum = null;
       do {
         termCount++;
-        // System.out.println("  iter termCount=" + termCount + " term=" +
-        // enumerator.term().toBytesString());
-        docsEnum = termsEnum.docs(delDocs, docsEnum);
-        final DocsEnum.BulkReadResult result = docsEnum.getBulkResult();
-        while (true) {
-          final int count = docsEnum.read();
-          if (count != 0) {
-            final int[] docs = result.docs.ints;
-            for (int i = 0; i < count; i++) {
-              bitSet.set(docs[i]);
-            }
-          } else {
-            break;
+        postingsEnum = termsEnum.bulkPostings(postingsEnum, false, false);
+        final int docFreq = termsEnum.docFreq();
+        final BulkPostingsEnum.BlockReader docDeltasReader = postingsEnum.getDocDeltasReader();
+        final int[] docDeltas = docDeltasReader.getBuffer();
+        int offset = docDeltasReader.offset();
+        int limit = docDeltasReader.end();
+        if (offset >= limit) {
+          limit = docDeltasReader.fill();
+        }
+        int count = 0;
+        int doc = 0;
+        while (count < docFreq) {
+          if (offset >= limit) {
+            offset = 0;
+            limit = docDeltasReader.fill();
+          }
+          doc += docDeltas[offset++];
+          count++;
+          if (delDocs == null || !delDocs.get(doc)) {
+            bitSet.set(doc);
           }
         }
       } while (termsEnum.next() != null);
-      // System.out.println("  done termCount=" + termCount);
 
       query.incTotalNumberOfTerms(termCount);
       return bitSet;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Dec 14 17:18:00 2010
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Set;
 
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.BulkPostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Explanation.IDFExplanation;
@@ -76,15 +77,18 @@ public class TermQuery extends Query {
 
     @Override
     public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
-      DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(),
-                                          term.field(),
-                                          term.bytes());
-
+      assert reader.getSequentialSubReaders() == null;
+      BulkPostingsEnum docs = reader.bulkTermPostingsEnum(term.field(),
+                                                          term.bytes(),
+                                                          true,
+                                                          false);
       if (docs == null) {
         return null;
       }
 
-      return new TermScorer(this, docs, similarity, reader.norms(term.field()));
+      // nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away.
+      return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()),
+                            reader.getDeletedDocs(), similarity, reader.norms(term.field()));
     }
 
     @Override
@@ -124,10 +128,10 @@ public class TermQuery extends Query {
       int tf = 0;
       DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), term.bytes());
       if (docs != null) {
-          int newDoc = docs.advance(doc);
-          if (newDoc == doc) {
-            tf = docs.freq();
-          }
+        int newDoc = docs.advance(doc);
+        if (newDoc == doc) {
+          tf = docs.freq();
+        }
         tfExplanation.setValue(similarity.tf(tf));
         tfExplanation.setDescription("tf(termFreq("+term+")="+tf+")");
       } else {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermScorer.java Tue Dec 14 17:18:00 2010
@@ -19,26 +19,33 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.index.BulkPostingsEnum;
+import org.apache.lucene.util.Bits;
 
 /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
  */
 final class TermScorer extends Scorer {
-  private DocsEnum docsEnum;
+  private BulkPostingsEnum docsEnum;
   private byte[] norms;
   private float weightValue;
-  private int doc = -1;
-  private int freq;
+  private int doc;
 
-  private int pointer;
-  private int pointerMax;
+  private final int[] docDeltas;
+  private int docPointer;
+  private int docPointerMax;
+  private boolean first = true;
+
+  private final int[] freqs;
+  private int freqPointer;
+  private int freqPointerMax;
 
   private static final int SCORE_CACHE_SIZE = 32;
   private float[] scoreCache = new float[SCORE_CACHE_SIZE];
-  private int[] docs;
-  private int[] freqs;
-  private final DocsEnum.BulkReadResult bulkResult;
+  private final BulkPostingsEnum.BlockReader freqsReader;
+  private final BulkPostingsEnum.BlockReader docDeltasReader;
+  private final Bits skipDocs;
+  private final int docFreq;
+  private int count;
 
   /**
    * Construct a <code>TermScorer</code>.
@@ -53,13 +60,36 @@ final class TermScorer extends Scorer {
    * @param norms
    *          The field norms of the document fields for the <code>Term</code>.
    */
-  TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) {
+  TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
     super(similarity, weight);
     
     this.docsEnum = td;
+    this.docFreq = docFreq;
+    docDeltasReader = td.getDocDeltasReader();
+    docDeltas = docDeltasReader.getBuffer();
+    docPointerMax = docDeltasReader.end();
+    docPointer = docDeltasReader.offset();
+    if (docPointer >= docPointerMax) {
+      docPointerMax = docDeltasReader.fill();
+    }
+    docPointer--;
+
+    freqsReader = td.getFreqsReader();
+    if (freqsReader != null) {
+      freqs = freqsReader.getBuffer();
+      freqPointerMax = freqsReader.end();
+      freqPointer = freqsReader.offset();
+      if (freqPointer >= freqPointerMax) {
+        freqPointerMax = freqsReader.fill();
+      }
+      freqPointer--;
+    } else {
+      freqs = null;
+    }
+
+    this.skipDocs = skipDocs;
     this.norms = norms;
     this.weightValue = weight.getValue();
-    bulkResult = td.getBulkResult();
 
     for (int i = 0; i < SCORE_CACHE_SIZE; i++)
       scoreCache[i] = getSimilarity().tf(i) * weightValue;
@@ -70,41 +100,73 @@ final class TermScorer extends Scorer {
     score(c, Integer.MAX_VALUE, nextDoc());
   }
 
-  private final void refillBuffer() throws IOException {
-    pointerMax = docsEnum.read();  // refill
-    docs = bulkResult.docs.ints;
-    freqs = bulkResult.freqs.ints;
-  }
-
   // firstDocID is ignored since nextDoc() sets 'doc'
   @Override
   protected boolean score(Collector c, int end, int firstDocID) throws IOException {
     c.setScorer(this);
+    //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc);
+    // nocommit -- this can leave scorer on a deleted doc...
     while (doc < end) {                           // for docs in window
-      c.collect(doc);                      // collect score
-      if (++pointer >= pointerMax) {
-        refillBuffer();
-        if (pointerMax != 0) {
-          pointer = 0;
-        } else {
-          doc = NO_MORE_DOCS;                // set to sentinel value
-          return false;
+      if (skipDocs == null || !skipDocs.get(doc)) {
+        //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq);
+        c.collect(doc);                      // collect
+      }
+      if (count == docFreq) {
+        doc = NO_MORE_DOCS;
+        return false;
+      }
+      count++;
+      docPointer++;
+
+      //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq);
+
+      if (docPointer >= docPointerMax) {
+        docPointerMax = docDeltasReader.fill();
+        //System.out.println("    refill!  dpMax=" + docPointerMax + " reader=" + docDeltasReader);
+        assert docPointerMax != 0;
+        docPointer = 0;
+
+        if (freqsReader != null) {
+          freqPointer++;
+          // NOTE: this code is intentionally dup'd
+          // (specialized) w/ the else clause, for better CPU
+          // branch prediction (assuming compiler doesn't
+          // de-dup): for codecs that always bulk read same
+          // number of docDeltas & freqs (standard, for,
+          // pfor), this if will always be true.  Other codecs
+          // (simple9/16) will not be aligned:
+          if (freqPointer >= freqPointerMax) {
+            freqPointerMax = freqsReader.fill();
+            assert freqPointerMax != 0;
+            freqPointer = 0;
+          }
         }
-      } 
-      doc = docs[pointer];
-      freq = freqs[pointer];
+      } else if (freqsReader != null) {
+        freqPointer++;
+        if (freqPointer >= freqPointerMax) {
+          freqPointerMax = freqsReader.fill();
+          assert freqPointerMax != 0;
+          freqPointer = 0;
+        }
+      }
+
+      doc += docDeltas[docPointer];
     }
     return true;
   }
 
   @Override
   public int docID() {
-    return doc;
+    return first ? -1 : doc;
   }
 
   @Override
   public float freq() {
-    return freq;
+    if (freqsReader != null) {
+      return freqs[freqPointer];
+    } else {
+      return 1.0f;
+    }
   }
 
   /**
@@ -116,23 +178,65 @@ final class TermScorer extends Scorer {
    */
   @Override
   public int nextDoc() throws IOException {
-    pointer++;
-    if (pointer >= pointerMax) {
-      refillBuffer();
-      if (pointerMax != 0) {
-        pointer = 0;
+    //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq);
+    while(count < docFreq) {
+      docPointer++;
+      if (docPointer >= docPointerMax) {
+        //System.out.println("ts.nd refill docs");
+        docPointerMax = docDeltasReader.fill();
+        assert docPointerMax != 0;
+        docPointer = 0;
+        if (freqsReader != null) {
+          // NOTE: this code is intentionally dup'd
+          // (specialized) w/ the else clause, for better CPU
+          // branch prediction (assuming compiler doesn't
+          // de-dup): for codecs that always bulk read same
+          // number of docDeltas & freqs (standard, for,
+          // pfor), this if will always be true.  Other codecs
+          // (simple9/16) will not be aligned:
+          freqPointer++;
+          if (freqPointer >= freqPointerMax) {
+            //System.out.println("ts.nd refill freqs");
+            freqPointerMax = freqsReader.fill();
+            assert freqPointerMax != 0;
+            freqPointer = 0;
+          }
+        }
       } else {
-        return doc = NO_MORE_DOCS;
+        if (freqsReader != null) {
+          freqPointer++;
+          if (freqPointer >= freqPointerMax) {
+            //System.out.println("ts.nd refill freqs");
+            freqPointerMax = freqsReader.fill();
+            assert freqPointerMax != 0;
+            freqPointer = 0;
+          }
+        }
       }
-    } 
-    doc = docs[pointer];
-    freq = freqs[pointer];
-    assert doc != NO_MORE_DOCS;
-    return doc;
+      count++;
+      doc += docDeltas[docPointer];
+      first = false;
+      assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
+      if (skipDocs == null || !skipDocs.get(doc)) {
+        //System.out.println("  ret doc=" + doc + " freq=" + freq());
+        return doc;
+      }
+    }
+
+    //System.out.println("  end");
+    return doc = NO_MORE_DOCS;
   }
   
   @Override
   public float score() {
+    assert !first;
+    final int freq;
+    if (freqsReader == null) {
+      freq = 1;
+    } else {
+      freq = freqs[freqPointer];
+    }
+    assert freq > 0;
     assert doc != NO_MORE_DOCS;
     float raw =                                   // compute tf(f)*weight
       freq < SCORE_CACHE_SIZE                        // check cache
@@ -153,24 +257,100 @@ final class TermScorer extends Scorer {
    */
   @Override
   public int advance(int target) throws IOException {
-    // first scan in cache
-    for (pointer++; pointer < pointerMax; pointer++) {
-      if (docs[pointer] >= target) {
-        freq = freqs[pointer];
-        return doc = docs[pointer];
+
+    // nocommit: should we, here, optimize .advance(target that isn't
+    // too far away) into scan?  seems like simple win?
+
+    // first scan current doc deltas block
+    for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
+      assert first || docDeltas[docPointer] > 0;
+      doc += docDeltas[docPointer];
+      first = false;
+      count++;
+      if (freqsReader != null && ++freqPointer >= freqPointerMax) {
+        freqPointerMax = freqsReader.fill();
+        assert freqPointerMax != 0;
+        freqPointer = 0;
+      } 
+      if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+        return doc;
       }
     }
 
-    // not found in readahead cache, seek underlying stream
-    int newDoc = docsEnum.advance(target);
-    //System.out.println("ts.advance docsEnum=" + docsEnum);
-    if (newDoc != NO_MORE_DOCS) {
-      doc = newDoc;
-      freq = docsEnum.freq();
+    if (count == docFreq) {
+      return doc = NO_MORE_DOCS;
+    }
+
+    // not found in current block, seek underlying stream
+    BulkPostingsEnum.JumpResult jumpResult = docsEnum.jump(target, count);
+    if (jumpResult != null) {
+      count = jumpResult.count;
+      doc = jumpResult.docID;
+      first = false;
+      docPointer = docDeltasReader.offset();
+      docPointerMax = docDeltasReader.end();
+      if (docPointer >= docPointerMax) {
+        docPointerMax = docDeltasReader.fill();
+      }
+      docPointer--;
+      if (freqsReader != null) {
+        freqPointer = freqsReader.offset();
+        freqPointerMax = freqsReader.end();
+        if (freqPointer >= freqPointerMax) {
+          freqPointerMax = freqsReader.fill();
+        }
+        freqPointer--;
+      }
     } else {
-      doc = NO_MORE_DOCS;
+      // seek did not jump -- just fill next buffer
+      docPointerMax = docDeltasReader.fill();
+      if (docPointerMax != 0) {
+        docPointer = 0;
+        assert first || docDeltas[0] > 0;
+        doc += docDeltas[0];
+        count++;
+        first = false;
+      } else {
+        return doc = NO_MORE_DOCS;
+      }
+      if (freqsReader != null && ++freqPointer >= freqPointerMax) {
+        freqPointerMax = freqsReader.fill();
+        assert freqPointerMax != 0;
+        freqPointer = 0;
+      } 
+    }
+
+    // now scan
+    while(true) {
+      assert doc >= 0 && doc != NO_MORE_DOCS;
+      if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+        return doc;
+      }
+
+      if (count >= docFreq) {
+        break;
+      }
+
+      if (++docPointer >= docPointerMax) {
+        docPointerMax = docDeltasReader.fill();
+        if (docPointerMax != 0) {
+          docPointer = 0;
+        } else {
+          return doc = NO_MORE_DOCS;
+        }
+      }
+
+      if (freqsReader != null && ++freqPointer >= freqPointerMax) {
+        freqPointerMax = freqsReader.fill();
+        assert freqPointerMax != 0;
+        freqPointer = 0;
+      } 
+
+      assert first || docDeltas[docPointer] > 0;
+      doc += docDeltas[docPointer];
+      count++;
     }
-    return doc;
+    return doc = NO_MORE_DOCS;
   }
 
   /** Returns a string representation of this <code>TermScorer</code>. */

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/BitUtil.java Tue Dec 14 17:18:00 2010
@@ -814,4 +814,25 @@ public final class BitUtil {
     return v;
   }
 
+  /** Returns the smallest non negative p such that a given value < (2**(p+1))
+   * This differs from (63 - java.lang.Long.numberOfLeadingZeros(v))
+   * for non positive given values.
+   */
+  public static int logNextHigherPowerOfTwo(long v) {
+    long vinput = v; // only for assertions below.
+    int p = 0;
+    while (v >= (1 << 8)) {
+      v >>= 8;
+      p += 8;
+    }
+    while (v >= (1 << 1)) {
+      v >>= 1;
+      p++;
+    }
+    assert (p <= 62) : p;
+    assert (p == 62) || (vinput < (1L << (p + 1))) : "p " + p + ", vinput " + vinput;
+    assert (p == 0) || (vinput >= (1L << p)) : "p " + p + ", vinput " + vinput;
+    assert (vinput <= 0) || (p == (63 - Long.numberOfLeadingZeros(vinput))) : "p " + p + ", vinput " + vinput;
+    return p;
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Tue Dec 14 17:18:00 2010
@@ -342,6 +342,114 @@ public class TestExternalCodecs extends 
       public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) {
         return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), skipDocs);
       }
+
+      @Override
+      public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+        return new RAMBulkPostingsEnum(ramField.termToDocs.get(current));
+      }
+    }
+
+    static final int BULK_BUFFER_SIZE = 64;
+  
+    // Bulk postings API
+    private static class RAMBulkPostingsEnum extends BulkPostingsEnum {
+      private final RAMTerm ramTerm;
+      private final BlockReader docDeltasReader;
+      private final BlockReader freqsReader;
+      private final BlockReader posDeltasReader;
+
+      public RAMBulkPostingsEnum(RAMTerm ramTerm) throws IOException {
+        this.ramTerm = ramTerm;
+
+        int[] docDeltas = new int[10];
+        int[] freqs = new int[10];
+        int[] posDeltas = new int[10];
+        int docUpto = 0;
+        int posUpto = 0;
+        int lastDocID = 0;
+        for(RAMDoc doc : ramTerm.docs) {
+          if (docDeltas.length == docUpto) {
+            docDeltas = ArrayUtil.grow(docDeltas, 1+docUpto);
+            freqs = ArrayUtil.grow(freqs, 1+docUpto);
+          }
+          docDeltas[docUpto] = doc.docID - lastDocID;
+          freqs[docUpto] = doc.positions.length;
+          docUpto++;
+          lastDocID = doc.docID;
+          int lastPos = 0;
+          for(int pos : doc.positions) {
+            if (posDeltas.length == posUpto) {
+              posDeltas = ArrayUtil.grow(posDeltas, 1+posUpto);
+            }
+            posDeltas[posUpto++] = pos - lastPos;
+            lastPos = pos;
+          }
+        }
+        docDeltasReader = new SimpleBlockReader(docDeltas, docUpto);
+        freqsReader = new SimpleBlockReader(freqs, docUpto);
+        posDeltasReader = new SimpleBlockReader(posDeltas, posUpto);
+      }
+
+      @Override
+      public BlockReader getDocDeltasReader() {
+        return docDeltasReader;
+      }
+
+      @Override
+      public BlockReader getFreqsReader() {
+        return freqsReader;
+      }
+
+      @Override
+      public BlockReader getPositionDeltasReader() {
+        return posDeltasReader;
+      }
+
+      @Override
+      public JumpResult jump(int target, int curCount) {
+        return null;
+      }
+
+      private static class SimpleBlockReader extends BlockReader {
+        private final int[] ints;
+        private final int count;
+        private boolean done;
+
+        public SimpleBlockReader(int[] ints, int count) {
+          this.ints = ints;
+          this.count = count;
+        }
+
+        @Override
+        public int[] getBuffer() {
+          return ints;
+        }
+
+        @Override
+        public int fill() {
+          if (!done) {
+            done = true;
+            return count;
+          } else {
+            return 0;
+          }
+        }
+
+        @Override
+        public int end() {
+          return done ? 0 : count;
+        }
+
+        @Override
+        public int offset() {
+          return 0;
+        }
+
+        @Override
+        public void setOffset(int offset) {
+          throw new UnsupportedOperationException();
+        }
+      }
     }
 
     private static class RAMDocsEnum extends DocsEnum {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Dec 14 17:18:00 2010
@@ -2910,4 +2910,138 @@ public class TestIndexWriter extends Luc
 
     dir.close();
   }
+
+  public void testGrowingGaps() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+    //w.w.setInfoStream(System.out);
+    Document doc = new Document();
+    Field f = newField(random, "field", "two", Field.Store.NO, Field.Index.ANALYZED);
+    doc.add(f);
+    final int NUM_GAPS = 100;
+    for(int i=0;i<NUM_GAPS;i++) {
+      f.setValue("one");
+      w.addDocument(doc);
+      f.setValue("two");
+      for(int j=0;j<1+i;j++) {
+        w.addDocument(doc);
+      }
+    }
+
+    // MultiBulkPostingsEnum doesn't jump (yet):
+    w.optimize();
+
+    IndexReader r = w.getReader();
+    w.close();
+
+    DocsEnum docs = MultiFields.getTermDocsEnum(r,
+                                                MultiFields.getDeletedDocs(r),
+                                                "field",
+                                                new BytesRef("one"));
+    // test simple linear scan:
+    int[] docIDs = new int[r.maxDoc()];
+    int upto = 0;
+    int docID;
+    int expDocID = 0;
+    int gap = 2;
+    while((docID = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+      //System.out.println("  got doc=" + docID + " ord=" + upto);
+      docIDs[upto++] = docID;
+      assertEquals(expDocID, docID);
+      expDocID += gap;
+      gap++;
+    }
+    assertEquals(NUM_GAPS, upto);
+
+    final int maxDoc = r.maxDoc();
+
+    // test advance:
+    for(int i=0;i<NUM_GAPS;i++) {
+      docs = MultiFields.getTermDocsEnum(r,
+                                         MultiFields.getDeletedDocs(r),
+                                         "field",
+                                         new BytesRef("one"));
+      //System.out.println("  .advance(" + docIDs[i] + ")");
+      assertEquals(docIDs[i], docs.advance(docIDs[i]));
+      for(int j=i+1;j<NUM_GAPS;j++) {
+        assertEquals(docIDs[j], docs.nextDoc());
+      }
+      assertEquals(DocsEnum.NO_MORE_DOCS, docs.nextDoc());
+    }
+
+    assertEquals(NUM_GAPS, r.docFreq("field", new BytesRef("one")));
+
+    BulkPostingsEnum bulkPostings = MultiFields.getBulkPostingsEnum(r,
+                                                                    "field",
+                                                                    new BytesRef("one"),
+                                                                    false,
+                                                                    false);
+
+    // test simple linear scan using BulkPostingsEnum:
+    BulkPostingsEnum.BlockReader docDeltasReader = bulkPostings.getDocDeltasReader();
+    int[] docDeltas = docDeltasReader.getBuffer();
+    int docDeltaUpto = docDeltasReader.offset();
+    int docDeltaMax = docDeltasReader.end();
+    if (docDeltaUpto >= docDeltaMax) {
+      docDeltaMax = docDeltasReader.fill();
+    }
+    docID = 0;
+    for(int i=0;i<NUM_GAPS;i++) {
+      if (docDeltaUpto == docDeltaMax) {
+        docDeltaUpto = 0;
+        docDeltaMax = docDeltasReader.fill();
+      }
+      assertTrue(docDeltas[docDeltaUpto] > 0 || i==0);
+      docID += docDeltas[docDeltaUpto++];
+      assertEquals(docID, docIDs[i]);
+    }
+
+    // nocommit test reuse too
+    // test jump using BulkPostingsEnum:
+    boolean didJump = false;
+    for(int i=0;i<NUM_GAPS;i++) {
+      //System.out.println("GAP i=" + i);
+      bulkPostings = MultiFields.getBulkPostingsEnum(r,
+                                                     "field",
+                                                     new BytesRef("one"),
+                                                     false,
+                                                     false);
+      //System.out.println("try jump " + docIDs[i]);
+      final BulkPostingsEnum.JumpResult jr = bulkPostings.jump(docIDs[i], 0);
+      int count;
+      if (jr != null) {
+        //System.out.println("  got jump!");
+        didJump = true;
+        assertEquals("jump to docID=" + docID + " got count=" + jr.count + " docID=" + jr.docID, docIDs[jr.count-1], jr.docID);
+        docID = jr.docID;
+        count = jr.count;
+      } else {
+        //System.out.println("  no jump!");
+        docID = 0;
+        count = 0;
+      }
+      docDeltasReader = bulkPostings.getDocDeltasReader();
+      docDeltas = docDeltasReader.getBuffer();
+      docDeltaUpto = docDeltasReader.offset();
+      docDeltaMax = docDeltasReader.end();
+      if (docDeltaUpto >= docDeltaMax) {
+        docDeltaMax = docDeltasReader.fill();
+        //System.out.println("  do pre-fill");
+      }
+      for(int j=count;j<NUM_GAPS;j++) {
+        //System.out.println("  GAP j=" + j);
+        if (docDeltaUpto >= docDeltaMax) {
+          docDeltaUpto = 0;
+          docDeltaMax = docDeltasReader.fill();
+        }
+        //System.out.println("  docUpto=" + docDeltaUpto + " delta=" + docDeltas[docDeltaUpto]);
+        docID += docDeltas[docDeltaUpto++];
+        assertEquals(docIDs[j], docID);
+      }
+    }
+    assertTrue(CodecProvider.getDefault().getFieldCodec("field").equals("SimpleText") || didJump);
+    
+    r.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/intblock/TestIntBlockCodec.java Tue Dec 14 17:18:00 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.index.codecs.i
  */
 
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.index.*;
 import org.apache.lucene.store.*;
 import org.apache.lucene.index.codecs.sep.*;
 import org.apache.lucene.index.codecs.mockintblock.*;
@@ -36,10 +37,20 @@ public class TestIntBlockCodec extends L
     out.close();
 
     IntIndexInput in = f.openInput(dir, "test");
-    IntIndexInput.Reader r = in.reader();
+    BulkPostingsEnum.BlockReader r = in.reader();
+
+    final int[] buffer = r.getBuffer();
+    int pointer = 0;
+    int pointerMax = r.fill();
+    assertTrue(pointerMax > 0);
 
     for(int i=0;i<11777;i++) {
-      assertEquals(i, r.next());
+      assertEquals(i, buffer[pointer++]);
+      if (pointer == pointerMax) {
+        pointerMax = r.fill();
+        assertTrue(pointerMax > 0);
+        pointer = 0;
+      }
     }
     in.close();
     

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java?rev=1049178&r1=1049177&r2=1049178&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java Tue Dec 14 17:18:00 2010
@@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.index.BulkPostingsEnum;
 
 /** Reads IndexInputs written with {@link
  *  SingleIntIndexOutput}.  NOTE: this class is just for
@@ -52,18 +53,41 @@ public class MockSingleIntIndexInput ext
     in.close();
   }
 
-  public static class Reader extends IntIndexInput.Reader {
+  public static class Reader extends BulkPostingsEnum.BlockReader {
     // clone:
     private final IndexInput in;
+    private int offset;
+    private final int[] buffer = new int[1];
 
     public Reader(IndexInput in) {
       this.in = in;
     }
 
-    /** Reads next single int */
     @Override
-    public int next() throws IOException {
-      return in.readVInt();
+    public int[] getBuffer() {
+      return buffer;
+    }
+
+    @Override
+    public int offset() {
+      return offset;
+    }
+
+    @Override
+    public void setOffset(int offset) {
+      this.offset = offset;
+    }
+
+    @Override
+    public int end() {
+      return 1;
+    }
+
+    @Override
+    public int fill() throws IOException {
+      buffer[0] = in.readVInt();
+      offset = 0;
+      return 1;
     }
   }
   
@@ -81,7 +105,7 @@ public class MockSingleIntIndexInput ext
     }
 
     @Override
-    public void read(IntIndexInput.Reader indexIn, boolean absolute)
+    public void read(BulkPostingsEnum.BlockReader indexIn, boolean absolute)
       throws IOException {
       if (absolute) {
         fp = indexIn.readVLong();
@@ -96,8 +120,9 @@ public class MockSingleIntIndexInput ext
     }
 
     @Override
-    public void seek(IntIndexInput.Reader other) throws IOException {
+    public void seek(BulkPostingsEnum.BlockReader other) throws IOException {
       ((Reader) other).in.seek(fp);
+      other.fill();
     }
 
     @Override



Mime
View raw message