lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1058390 [4/16] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contr...
Date Thu, 13 Jan 2011 02:09:56 GMT
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Thu Jan 13 02:09:33 2011
@@ -59,17 +59,10 @@ final class TermVectorsTermsWriter exten
       tvx = tvd = tvf = null;
       assert state.segmentName != null;
       String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
-      String fldName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
-      String docName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
-
       if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
         throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
       }
 
-      state.flushedFiles.add(idxName);
-      state.flushedFiles.add(fldName);
-      state.flushedFiles.add(docName);
-
       lastDocID = 0;
       state.hasVectors = hasVectors;
       hasVectors = false;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java Thu Jan 13 02:09:33 2011
@@ -36,13 +36,17 @@ final class DeltaBytesReader {
     term.copy(text);
   }
 
-  void read() throws IOException {
+  boolean read() throws IOException {
     final int start = in.readVInt();
+    if (start == DeltaBytesWriter.TERM_EOF) {
+      return false;
+    }
     final int suffix = in.readVInt();
     assert start <= term.length: "start=" + start + " length=" + term.length;
     final int newLength = start+suffix;
     term.grow(newLength);
     in.readBytes(term.bytes, start, suffix);
     term.length = newLength;
+    return true;
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java Thu Jan 13 02:09:33 2011
@@ -20,11 +20,18 @@ package org.apache.lucene.index.codecs;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 
 import java.io.IOException;
 
 final class DeltaBytesWriter {
 
+  // Must be bigger than
+  // DocumentsWriter.MAX_TERM_LENGTH_UTF8.  If you change
+  // this it's an index format change, so that change must be
+  // versioned:
+  final static int TERM_EOF = BYTE_BLOCK_SIZE;
+
   private byte[] lastBytes = new byte[10];
   private int lastLength;
   final IndexOutput out;
@@ -45,8 +52,9 @@ final class DeltaBytesWriter {
 
     final int limit = length < lastLength ? length : lastLength;
     while(start < limit) {
-      if (bytes[upto] != lastBytes[start])
+      if (bytes[upto] != lastBytes[start]) {
         break;
+      }
       start++;
       upto++;
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Thu Jan 13 02:09:33 2011
@@ -33,29 +33,6 @@ import java.util.Collection;
 import java.util.Comparator;
 import java.io.IOException;
 
-/**
- * Uses a simplistic format to record terms dict index
- * information.  Limititations:
- *
- *   - Index for all fields is loaded entirely into RAM up
- *     front 
- *   - Index is stored in RAM using shared byte[] that
- *     wastefully expand every term.  Using FST to share
- *     common prefix & suffix would save RAM.
- *   - Index is taken at regular numTerms (every 128 by
- *     default); might be better to do it by "net docFreqs"
- *     encountered, so that for spans of low-freq terms we
- *     take index less often.
- *
- * A better approach might be something similar to how
- * postings are encoded, w/ multi-level skips.  Ie, load all
- * terms index data into memory, as a single large compactly
- * encoded stream (eg delta bytes + delta offset).  Index
- * that w/ multi-level skipper.  Then to look up a term is
- * the equivalent binary search, using the skipper instead,
- * while data remains compressed in memory.
- */
-
 import org.apache.lucene.index.IndexFileNames;
 
 /** @lucene.experimental */
@@ -74,7 +51,7 @@ public class FixedGapTermsIndexReader ex
   final private int indexInterval;
 
   // Closed if indexLoaded is true:
-  final private IndexInput in;
+  private IndexInput in;
   private volatile boolean indexLoaded;
 
   private final Comparator<BytesRef> termComp;
@@ -85,7 +62,7 @@ public class FixedGapTermsIndexReader ex
   private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
   private PagedBytes.Reader termBytesReader;
 
-  final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
+  final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
   
   // start of the field info data
   protected long dirOffset;
@@ -95,7 +72,7 @@ public class FixedGapTermsIndexReader ex
 
     this.termComp = termComp;
 
-    IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
     
     boolean success = false;
 
@@ -116,49 +93,137 @@ public class FixedGapTermsIndexReader ex
       seekDir(in, dirOffset);
 
       // Read directory
-      final int numFields = in.readInt();
-
+      final int numFields = in.readVInt();      
       for(int i=0;i<numFields;i++) {
-        final int field = in.readInt();
-        final int numIndexTerms = in.readInt();
-        final long termsStart = in.readLong();
-        final long indexStart = in.readLong();
-        final long packedIndexStart = in.readLong();
-        final long packedOffsetsStart = in.readLong();
+        final int field = in.readVInt();
+        final int numIndexTerms = in.readVInt();
+        final long termsStart = in.readVLong();
+        final long indexStart = in.readVLong();
+        final long packedIndexStart = in.readVLong();
+        final long packedOffsetsStart = in.readVLong();
         assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment;
-        if (numIndexTerms > 0) {
-          final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-          fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
-        }
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
       }
       success = true;
     } finally {
       if (indexDivisor > 0) {
         in.close();
-        this.in = null;
+        in = null;
         if (success) {
           indexLoaded = true;
         }
         termBytesReader = termBytes.freeze(true);
-      } else {
-        this.in = in;
       }
     }
   }
   
+  @Override
+  public int getDivisor() {
+    return indexDivisor;
+  }
+
   protected void readHeader(IndexInput input) throws IOException {
     CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
       FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_START);
     dirOffset = input.readLong();
   }
 
-  private final class FieldIndexReader extends FieldReader {
+  private class IndexEnum extends FieldIndexEnum {
+    private final FieldIndexData.CoreFieldIndex fieldIndex;
+    private final BytesRef term = new BytesRef();
+    private final BytesRef nextTerm = new BytesRef();
+    private long ord;
 
-    final private FieldInfo fieldInfo;
+    public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
+      this.fieldIndex = fieldIndex;
+    }
+
+    @Override
+    public BytesRef term() {
+      return term;
+    }
+
+    @Override
+    public long seek(BytesRef target) {
+      int lo = 0;				  // binary search
+      int hi = fieldIndex.numIndexTerms - 1;
+      assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
+
+      while (hi >= lo) {
+        int mid = (lo + hi) >>> 1;
+
+        final long offset = fieldIndex.termOffsets.get(mid);
+        final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
+        termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+        int delta = termComp.compare(target, term);
+        if (delta < 0) {
+          hi = mid - 1;
+        } else if (delta > 0) {
+          lo = mid + 1;
+        } else {
+          assert mid >= 0;
+          ord = mid*totalIndexInterval;
+          return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
+        }
+      }
 
-    private volatile CoreFieldIndex coreIndex;
+      if (hi < 0) {
+        assert hi == -1;
+        hi = 0;
+      }
 
-    private final IndexInput in;
+      final long offset = fieldIndex.termOffsets.get(hi);
+      final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+      ord = hi*totalIndexInterval;
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
+    }
+
+    @Override
+    public long next() {
+      final int idx = 1 + (int) (ord / totalIndexInterval);
+      if (idx >= fieldIndex.numIndexTerms) {
+        return -1;
+      }
+      ord += totalIndexInterval;
+
+      final long offset = fieldIndex.termOffsets.get(idx);
+      final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+      termBytesReader.fillSlice(nextTerm, fieldIndex.termBytesStart + offset, length);
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+    }
+
+    @Override
+    public long ord() {
+      return ord;
+    }
+
+    @Override
+    public long seek(long ord) {
+      int idx = (int) (ord / totalIndexInterval);
+      // caller must ensure ord is in bounds
+      assert idx < fieldIndex.numIndexTerms;
+      final long offset = fieldIndex.termOffsets.get(idx);
+      final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+      this.ord = idx * totalIndexInterval;
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+    }
+  }
+
+  @Override
+  public boolean supportsOrd() {
+    return true;
+  }
+
+  private final class FieldIndexData {
+
+    final private FieldInfo fieldInfo;
+
+    volatile CoreFieldIndex coreIndex;
 
     private final long indexStart;
     private final long termsStart;
@@ -167,11 +232,10 @@ public class FixedGapTermsIndexReader ex
 
     private final int numIndexTerms;
 
-    public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
-                            long packedOffsetsStart) throws IOException {
+    public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
+                          long packedOffsetsStart) throws IOException {
 
       this.fieldInfo = fieldInfo;
-      this.in = in;
       this.termsStart = termsStart;
       this.indexStart = indexStart;
       this.packedIndexStart = packedIndexStart;
@@ -182,12 +246,7 @@ public class FixedGapTermsIndexReader ex
       // is -1, so that PrefixCodedTermsReader can call
       // isIndexTerm for each field:
       if (indexDivisor > 0) {
-        coreIndex = new CoreFieldIndex(indexStart,
-                                       termsStart,
-                                       packedIndexStart,
-                                       packedOffsetsStart,
-                                       numIndexTerms);
-      
+        loadTermsIndex();
       }
     }
 
@@ -197,46 +256,11 @@ public class FixedGapTermsIndexReader ex
       }
     }
 
-    @Override
-    public boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) {
-      if (onlyLoaded) {
-        return ord % totalIndexInterval == 0;
-      } else {
-        return ord % indexInterval == 0;
-      }
-    }
-
-    @Override
-    public boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException {
-      if (coreIndex == null) {
-        throw new IllegalStateException("terms index was not loaded");
-      } else {
-        return coreIndex.nextIndexTerm(ord, result);
-      }
-    }
-
-    @Override
-    public void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
-      // You must call loadTermsIndex if you had specified -1 for indexDivisor
-      if (coreIndex == null) {
-        throw new IllegalStateException("terms index was not loaded");
-      }
-      coreIndex.getIndexOffset(term, result);
-    }
-
-    @Override
-    public void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
-      // You must call loadTermsIndex if you had specified
-      // indexDivisor < 0 to ctor
-      if (coreIndex == null) {
-        throw new IllegalStateException("terms index was not loaded");
-      }
-      coreIndex.getIndexOffset(ord, result);
-    }
-
     private final class CoreFieldIndex {
 
-      final private long termBytesStart;
+      // where this field's terms begin in the packed byte[]
+      // data
+      final long termBytesStart;
 
       // offset into index termBytes
       final PackedInts.Reader termOffsets;
@@ -245,7 +269,6 @@ public class FixedGapTermsIndexReader ex
       final PackedInts.Reader termsDictOffsets;
 
       final int numIndexTerms;
-
       final long termsStart;
 
       public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException {
@@ -315,7 +338,6 @@ public class FixedGapTermsIndexReader ex
               termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
 
               termOffsetsM.set(upto, termOffsetUpto);
-              upto++;
 
               long termOffset = termOffsetsIter.next();
               long nextTermOffset = termOffsetsIter.next();
@@ -328,6 +350,11 @@ public class FixedGapTermsIndexReader ex
               termBytes.copy(clone, numTermBytes);
               termOffsetUpto += numTermBytes;
 
+              upto++;
+              if (upto == this.numIndexTerms) {
+                break;
+              }
+
               // skip terms:
               termsDictOffsetsIter.next();
               for(int i=0;i<indexDivisor-2;i++) {
@@ -344,71 +371,10 @@ public class FixedGapTermsIndexReader ex
           }
         }
       }
-
-      public boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException {
-        int idx = 1 + (int) (ord / totalIndexInterval);
-        if (idx < numIndexTerms) {
-          fillResult(idx, result);
-          return true;
-        } else {
-          return false;
-        }
-      }
-
-      private void fillResult(int idx, TermsIndexResult result) {
-        final long offset = termOffsets.get(idx);
-        final int length = (int) (termOffsets.get(1+idx) - offset);
-        termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-        result.position = idx * totalIndexInterval;
-        result.offset = termsStart + termsDictOffsets.get(idx);
-      }
-
-      public void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
-        int lo = 0;					  // binary search
-        int hi = numIndexTerms - 1;
-        assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
-
-        while (hi >= lo) {
-          int mid = (lo + hi) >>> 1;
-
-          final long offset = termOffsets.get(mid);
-          final int length = (int) (termOffsets.get(1+mid) - offset);
-          termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-
-          int delta = termComp.compare(term, result.term);
-          if (delta < 0) {
-            hi = mid - 1;
-          } else if (delta > 0) {
-            lo = mid + 1;
-          } else {
-            assert mid >= 0;
-            result.position = mid*totalIndexInterval;
-            result.offset = termsStart + termsDictOffsets.get(mid);
-            return;
-          }
-        }
-        if (hi < 0) {
-          assert hi == -1;
-          hi = 0;
-        }
-
-        final long offset = termOffsets.get(hi);
-        final int length = (int) (termOffsets.get(1+hi) - offset);
-        termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-
-        result.position = hi*totalIndexInterval;
-        result.offset = termsStart + termsDictOffsets.get(hi);
-      }
-
-      public void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
-        int idx = (int) (ord / totalIndexInterval);
-        // caller must ensure ord is in bounds
-        assert idx < numIndexTerms;
-        fillResult(idx, result);
-      }
     }
   }
 
+  // Externally synced in IndexWriter
   @Override
   public void loadTermsIndex(int indexDivisor) throws IOException {
     if (!indexLoaded) {
@@ -420,7 +386,7 @@ public class FixedGapTermsIndexReader ex
       }
       this.totalIndexInterval = indexInterval * this.indexDivisor;
 
-      Iterator<FieldIndexReader> it = fields.values().iterator();
+      Iterator<FieldIndexData> it = fields.values().iterator();
       while(it.hasNext()) {
         it.next().loadTermsIndex();
       }
@@ -432,8 +398,13 @@ public class FixedGapTermsIndexReader ex
   }
 
   @Override
-  public FieldReader getField(FieldInfo fieldInfo) {
-    return fields.get(fieldInfo);
+  public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
+    final FieldIndexData fieldData = fields.get(fieldInfo);
+    if (fieldData.coreIndex == null) {
+      return null;
+    } else {
+      return new IndexEnum(fieldData.coreIndex);
+    }
   }
 
   public static void files(Directory dir, SegmentInfo info, String id, Collection<String> files) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java Thu Jan 13 02:09:33 2011
@@ -31,7 +31,14 @@ import java.util.List;
 import java.util.ArrayList;
 import java.io.IOException;
 
-/** @lucene.experimental */
+/**
+ * Selects every Nth term as and index term, and hold term
+ * bytes fully expanded in memory.  This terms index
+ * supports seeking by ord.  See {@link
+ * VariableGapTermsIndexWriter} for a more memory efficient
+ * terms index that does not support seeking by ord.
+ *
+ * @lucene.experimental */
 public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
   protected final IndexOutput out;
 
@@ -50,7 +57,6 @@ public class FixedGapTermsIndexWriter ex
 
   public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
     final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
-    state.flushedFiles.add(indexFileName);
     termIndexInterval = state.termIndexInterval;
     out = state.directory.createOutput(indexFileName);
     fieldInfos = state.fieldInfos;
@@ -203,15 +209,25 @@ public class FixedGapTermsIndexWriter ex
     final long dirStart = out.getFilePointer();
     final int fieldCount = fields.size();
 
-    out.writeInt(fieldCount);
+    int nonNullFieldCount = 0;
     for(int i=0;i<fieldCount;i++) {
       SimpleFieldWriter field = fields.get(i);
-      out.writeInt(field.fieldInfo.number);
-      out.writeInt(field.numIndexTerms);
-      out.writeLong(field.termsStart);
-      out.writeLong(field.indexStart);
-      out.writeLong(field.packedIndexStart);
-      out.writeLong(field.packedOffsetsStart);
+      if (field.numIndexTerms > 0) {
+        nonNullFieldCount++;
+      }
+    }
+
+    out.writeVInt(nonNullFieldCount);
+    for(int i=0;i<fieldCount;i++) {
+      SimpleFieldWriter field = fields.get(i);
+      if (field.numIndexTerms > 0) {
+        out.writeVInt(field.fieldInfo.number);
+        out.writeVInt(field.numIndexTerms);
+        out.writeVLong(field.termsStart);
+        out.writeVLong(field.indexStart);
+        out.writeVLong(field.packedIndexStart);
+        out.writeVLong(field.packedOffsetsStart);
+      }
     }
     writeTrailer(dirStart);
     out.close();

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Thu Jan 13 02:09:33 2011
@@ -141,12 +141,10 @@ public class PrefixCodedTermsReader exte
         final long numTerms = in.readLong();
         assert numTerms >= 0;
         final long termsStartPointer = in.readLong();
-        final TermsIndexReaderBase.FieldReader fieldIndexReader;
         final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-        fieldIndexReader = indexReader.getField(fieldInfo);
         if (numTerms > 0) {
           assert !fields.containsKey(fieldInfo.name);
-          fields.put(fieldInfo.name, new FieldReader(fieldIndexReader, fieldInfo, numTerms, termsStartPointer));
+          fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer));
         }
       }
       success = true;
@@ -252,14 +250,12 @@ public class PrefixCodedTermsReader exte
     final long numTerms;
     final FieldInfo fieldInfo;
     final long termsStartPointer;
-    final TermsIndexReaderBase.FieldReader fieldIndexReader;
 
-    FieldReader(TermsIndexReaderBase.FieldReader fieldIndexReader, FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.numTerms = numTerms;
       this.termsStartPointer = termsStartPointer;
-      this.fieldIndexReader = fieldIndexReader;
     }
 
     @Override
@@ -282,18 +278,25 @@ public class PrefixCodedTermsReader exte
       return numTerms;
     }
 
-    // Iterates through terms in this field
+    // Iterates through terms in this field, not supporting ord()
     private class SegmentTermsEnum extends TermsEnum {
       private final IndexInput in;
       private final DeltaBytesReader bytesReader;
       private final TermState state;
       private boolean seekPending;
-      private final TermsIndexReaderBase.TermsIndexResult indexResult = new TermsIndexReaderBase.TermsIndexResult();
       private final FieldAndTerm fieldTerm = new FieldAndTerm();
+      private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
+      private boolean positioned;
+      private boolean didIndexNext;
+      private BytesRef nextIndexTerm;
+      private boolean isIndexTerm;
+      private final boolean doOrd;
 
       SegmentTermsEnum() throws IOException {
         in = (IndexInput) PrefixCodedTermsReader.this.in.clone();
         in.seek(termsStartPointer);
+        indexEnum = indexReader.getFieldEnum(fieldInfo);
+        doOrd = indexReader.supportsOrd();
         bytesReader = new DeltaBytesReader(in);
         fieldTerm.field = fieldInfo.name;
         state = postingsReader.newTermState();
@@ -313,12 +316,41 @@ public class PrefixCodedTermsReader exte
                        stateCopy);
       }
 
+      // called only from assert
+      private boolean first;
+      private int indexTermCount;
+
+      private boolean startSeek() {
+        first = true;
+        indexTermCount = 0;
+        return true;
+      }
+
+      private boolean checkSeekScan() {
+        if (!first && isIndexTerm) {
+          indexTermCount++;
+          if (indexTermCount >= indexReader.getDivisor()) {
+            //System.out.println("now fail count=" + indexTermCount);
+            return false;
+          }
+        }
+        first = false;
+        return true;
+      }
+
       /** Seeks until the first term that's >= the provided
        *  text; returns SeekStatus.FOUND if the exact term
        *  is found, SeekStatus.NOT_FOUND if a different term
        *  was found, SeekStatus.END if we hit EOF */
       @Override
       public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+        
+        //System.out.println("te.seek term=" + fieldInfo.name + ":" + term.utf8ToString() + " current=" + term().utf8ToString() + " useCache=" + useCache + " this="  + this);
+
         // Check cache
         TermState cachedState;
         if (useCache) {
@@ -327,7 +359,9 @@ public class PrefixCodedTermsReader exte
           if (cachedState != null) {
             state.copy(cachedState);
             seekPending = true;
+            positioned = false;
             bytesReader.term.copy(term);
+            //System.out.println("  cached!");
             return SeekStatus.FOUND;
           }
         } else {
@@ -336,36 +370,54 @@ public class PrefixCodedTermsReader exte
 
         boolean doSeek = true;
 
-        if (state.ord != -1) {
-          // we are positioned
+        if (positioned) {
 
           final int cmp = termComp.compare(bytesReader.term, term);
 
           if (cmp == 0) {
             // already at the requested term
             return SeekStatus.FOUND;
-          }
+          } else if (cmp < 0) {
+
+            if (seekPending) {
+              seekPending = false;
+              in.seek(state.filePointer);
+              indexEnum.seek(bytesReader.term);
+              didIndexNext = false;
+            }
+
+            // Target term is after current term
+            if (!didIndexNext) {
+              if (indexEnum.next() == -1) {
+                nextIndexTerm = null;
+              } else {
+                nextIndexTerm = indexEnum.term();
+              }
+              //System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+              didIndexNext = true;
+            }
 
-          if (cmp < 0 &&
-              fieldIndexReader.nextIndexTerm(state.ord, indexResult) &&
-              termComp.compare(indexResult.term, term) > 0) {
-            // Optimization: requested term is within the
-            // same index block we are now in; skip seeking
-            // (but do scanning):
-            doSeek = false;
+            if (nextIndexTerm == null || termComp.compare(term, nextIndexTerm) < 0) {
+              // Optimization: requested term is within the
+              // same index block we are now in; skip seeking
+              // (but do scanning):
+              doSeek = false;
+              //System.out.println("  skip seek: nextIndexTerm=" + nextIndexTerm);
+            }
           }
         }
 
-        // Used only for assert:
-        final long startOrd;
-
         if (doSeek) {
 
-          // As index to find biggest index term that's <=
-          // our text:
-          fieldIndexReader.getIndexOffset(term, indexResult);
+          positioned = true;
 
-          in.seek(indexResult.offset);
+          // Ask terms index to find biggest index term that's <=
+          // our text:
+          in.seek(indexEnum.seek(term));
+          didIndexNext = false;
+          if (doOrd) {
+            state.ord = indexEnum.ord()-1;
+          }
           seekPending = false;
 
           // NOTE: the first next() after an index seek is
@@ -374,21 +426,21 @@ public class PrefixCodedTermsReader exte
           // those bytes in the primary file, but then when
           // scanning over an index term we'd have to
           // special case it:
-          bytesReader.reset(indexResult.term);
-          
-          state.ord = indexResult.position-1;
-          assert state.ord >= -1: "ord=" + state.ord + " pos=" + indexResult.position;
-
-          startOrd = indexResult.position;
+          bytesReader.reset(indexEnum.term());
+          //System.out.println("  doSeek term=" + indexEnum.term().utf8ToString() + " vs target=" + term.utf8ToString());
         } else {
-          startOrd = -1;
+          //System.out.println("  skip seek");
         }
 
+        assert startSeek();
+
         // Now scan:
-        while(next() != null) {
+        while (next() != null) {
           final int cmp = termComp.compare(bytesReader.term, term);
           if (cmp == 0) {
-            if (doSeek && useCache) {
+
+            // Done!
+            if (useCache) {
               // Store in cache
               FieldAndTerm entryKey = new FieldAndTerm(fieldTerm);
               cachedState = (TermState) state.clone();
@@ -396,93 +448,62 @@ public class PrefixCodedTermsReader exte
               cachedState.filePointer = in.getFilePointer();
               termsCache.put(entryKey, cachedState);
             }
+
             return SeekStatus.FOUND;
           } else if (cmp > 0) {
             return SeekStatus.NOT_FOUND;
           }
+
           // The purpose of the terms dict index is to seek
           // the enum to the closest index term before the
           // term we are looking for.  So, we should never
           // cross another index term (besides the first
           // one) while we are scanning:
-          assert state.ord == startOrd || !fieldIndexReader.isIndexTerm(state.ord, state.docFreq, true): "state.ord=" + state.ord + " startOrd=" + startOrd + " ir.isIndexTerm=" + fieldIndexReader.isIndexTerm(state.ord, state.docFreq, true) + " state.docFreq=" + state.docFreq;
+          assert checkSeekScan();
         }
 
+        positioned = false;
         return SeekStatus.END;
       }
 
       @Override
-      public SeekStatus seek(long ord) throws IOException {
-
-        // TODO: should we cache term lookup by ord as well...?
-
-        if (ord >= numTerms) {
-          state.ord = numTerms-1;
-          return SeekStatus.END;
-        }
-
-        fieldIndexReader.getIndexOffset(ord, indexResult);
-        in.seek(indexResult.offset);
-        seekPending = false;
-
-        // NOTE: the first next() after an index seek is
-        // wasteful, since it redundantly reads the same
-        // bytes into the buffer
-        bytesReader.reset(indexResult.term);
-
-        state.ord = indexResult.position-1;
-        assert state.ord >= -1: "ord=" + state.ord;
-
-        // Now, scan:
-        int left = (int) (ord - state.ord);
-        while(left > 0) {
-          final BytesRef term = next();
-          assert term != null;
-          left--;
-        }
-
-        // always found
-        return SeekStatus.FOUND;
-      }
-
-      @Override
       public BytesRef term() {
         return bytesReader.term;
       }
 
       @Override
-      public long ord() {
-        return state.ord;
-      }
-
-      @Override
       public BytesRef next() throws IOException {
 
         if (seekPending) {
           seekPending = false;
           in.seek(state.filePointer);
+          indexEnum.seek(bytesReader.term);
+          didIndexNext = false;
         }
         
-        if (state.ord >= numTerms-1) {
+        if (!bytesReader.read()) {
+          //System.out.println("te.next end!");
+          positioned = false;
           return null;
         }
 
-        bytesReader.read();
-        state.docFreq = in.readVInt();
+        final byte b = in.readByte();
+        isIndexTerm = (b & 0x80) != 0;
+
+        if ((b & 0x40) == 0) {
+          // Fast case -- docFreq fits in 6 bits
+          state.docFreq = b & 0x3F;
+        } else {
+          state.docFreq = (in.readVInt() << 6) | (b & 0x3F);
+        }
 
-        // TODO: would be cleaner, but space-wasting, to
-        // simply record a bit into each index entry as to
-        // whether it's an index entry or not, rather than
-        // re-compute that information... or, possibly store
-        // a "how many terms until next index entry" in each
-        // index entry, but that'd require some tricky
-        // lookahead work when writing the index
         postingsReader.readTerm(in,
                                 fieldInfo, state,
-                                fieldIndexReader.isIndexTerm(1+state.ord, state.docFreq, false));
-
+                                isIndexTerm);
         state.ord++;
+        positioned = true;
 
+        //System.out.println("te.next term=" + bytesReader.term.utf8ToString());
         return bytesReader.term;
       }
 
@@ -512,6 +533,50 @@ public class PrefixCodedTermsReader exte
           return postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse);
         }
       }
+
+      @Override
+      public SeekStatus seek(long ord) throws IOException {
+
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+
+        if (ord >= numTerms) {
+          state.ord = numTerms-1;
+          return SeekStatus.END;
+        }
+
+        in.seek(indexEnum.seek(ord));
+        seekPending = false;
+        positioned = true;
+
+        // NOTE: the first next() after an index seek is
+        // wasteful, since it redundantly reads the same
+        // bytes into the buffer
+        bytesReader.reset(indexEnum.term());
+
+        state.ord = indexEnum.ord()-1;
+        assert state.ord >= -1: "ord=" + state.ord;
+
+        // Now, scan:
+        int left = (int) (ord - state.ord);
+        while(left > 0) {
+          final BytesRef term = next();
+          assert term != null;
+          left--;
+        }
+
+        // always found
+        return SeekStatus.FOUND;
+      }
+
+      @Override
+      public long ord() {
+        if (!doOrd) {
+          throw new UnsupportedOperationException();
+        }
+        return state.ord;
+      }
     }
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java Thu Jan 13 02:09:33 2011
@@ -74,7 +74,6 @@ public class PrefixCodedTermsWriter exte
     this.termComp = termComp;
     out = state.directory.createOutput(termsFileName);
     termsIndexWriter.setTermsOutput(out);
-    state.flushedFiles.add(termsFileName);
 
     fieldInfos = state.fieldInfos;
     writeHeader(out);
@@ -93,7 +92,7 @@ public class PrefixCodedTermsWriter exte
   }
 
   @Override
-  public TermsConsumer addField(FieldInfo field) {
+  public TermsConsumer addField(FieldInfo field) throws IOException {
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
     TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field);
@@ -173,12 +172,25 @@ public class PrefixCodedTermsWriter exte
     public void finishTerm(BytesRef text, int numDocs) throws IOException {
 
       assert numDocs > 0;
+      //System.out.println("finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " fp="  + out.getFilePointer());
 
       final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs);
 
       termWriter.write(text);
-      out.writeVInt(numDocs);
+      final int highBit = isIndexTerm ? 0x80 : 0;
+      //System.out.println("  isIndex=" + isIndexTerm);
 
+      // This is a vInt, except, we steal top bit to record
+      // whether this was an indexed term:
+      if ((numDocs & ~0x3F) == 0) {
+        // Fast case -- docFreq fits in 6 bits
+        out.writeByte((byte) (highBit | numDocs));
+      } else {
+        // Write bottom 6 bits of docFreq, then write the
+        // remainder as vInt:
+        out.writeByte((byte) (highBit | 0x40 | (numDocs & 0x3F)));
+        out.writeVInt(numDocs >>> 6);
+      }
       postingsWriter.finishTerm(numDocs, isIndexTerm);
       numTerms++;
     }
@@ -186,6 +198,8 @@ public class PrefixCodedTermsWriter exte
     // Finishes all terms in this field
     @Override
     public void finish() throws IOException {
+      // EOF marker:
+      out.writeVInt(DeltaBytesWriter.TERM_EOF);
       fieldIndexWriter.finish();
     }
   }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java Thu Jan 13 02:09:33 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.util.BytesRef;
 
 import java.io.IOException;
+import java.io.Closeable;
 import java.util.Collection;
 
 
@@ -38,39 +39,37 @@ import java.util.Collection;
  * text. 
  * @lucene.experimental */
 
-public abstract class TermsIndexReaderBase {
+public abstract class TermsIndexReaderBase implements Closeable {
 
-  static class TermsIndexResult {
-    long position;
-    final BytesRef term = new BytesRef();
-    long offset;
-  };
-
-  public abstract class FieldReader {
-    /** Returns position of "largest" index term that's <=
-     *  text.  Returned TermsIndexResult may be reused
-     *  across calls.  This resets internal state, and
-     *  expects that you'll then scan the file and
-     *  sequentially call isIndexTerm for each term
-     *  encountered. */
-    public abstract void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException;
-
-    public abstract void getIndexOffset(long ord, TermsIndexResult result) throws IOException;
-
-    /** Call this sequentially for each term encountered,
-     *  after calling {@link #getIndexOffset}. */
-    public abstract boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) throws IOException;
-
-    /** Finds the next index term, after the specified
-     *  ord.  Returns true if one exists.  */
-    public abstract boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException;
-  }
-
-  public abstract FieldReader getField(FieldInfo fieldInfo);
+  public abstract FieldIndexEnum getFieldEnum(FieldInfo fieldInfo);
 
   public abstract void loadTermsIndex(int indexDivisor) throws IOException;
 
   public abstract void close() throws IOException;
 
   public abstract void getExtensions(Collection<String> extensions);
-}
\ No newline at end of file
+
+  public abstract boolean supportsOrd();
+
+  public abstract int getDivisor();
+
+  // Similar to TermsEnum, except, the only "metadata" it
+  // reports for a given indexed term is the long fileOffset
+  // into the main terms dict (_X.tis) file:
+  public static abstract class FieldIndexEnum {
+
+    /** Seeks to "largest" indexed term that's <=
+     *  term; retruns file pointer index (into the main
+     *  terms index file) for that term */
+    public abstract long seek(BytesRef term) throws IOException;
+
+    /** Returns -1 at end */
+    public abstract long next() throws IOException;
+
+    public abstract BytesRef term();
+
+    // Only impl'd if supportsOrd() returns true!
+    public abstract long seek(long ord) throws IOException;
+    public abstract long ord();
+  }
+}

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java Thu Jan 13 02:09:33 2011
@@ -32,7 +32,7 @@ public abstract class TermsIndexWriterBa
     public abstract void finish() throws IOException;
   }
 
-  public abstract FieldWriter addField(FieldInfo fieldInfo);
+  public abstract FieldWriter addField(FieldInfo fieldInfo) throws IOException;
 
   public abstract void close() throws IOException;
-}
\ No newline at end of file
+}

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Thu Jan 13 02:09:33 2011
@@ -230,8 +230,11 @@ public final class PulsingPostingsWriter
   }
 
   @Override
-  public void finishDoc() {
+  public void finishDoc() throws IOException {
     assert omitTF || currentDoc.numPositions == currentDoc.termDocFreq;
+    if (pulsed) {
+      wrappedPostingsWriter.finishDoc();
+    }
   }
 
   boolean pendingIsIndexTerm;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Thu Jan 13 02:09:33 2011
@@ -699,6 +699,7 @@ public class SepPostingsReaderImpl exten
 
       final int code = nextPosInt();
 
+      assert code >= 0;
       if (storePayloads) {
         if ((code & 1) != 0) {
           // Payload length has changed

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Thu Jan 13 02:09:33 2011
@@ -85,24 +85,20 @@ public final class SepPostingsWriterImpl
     super();
 
     final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
-    state.flushedFiles.add(docFileName);
     docOut = factory.createOutput(state.directory, docFileName);
     docIndex = docOut.index();
 
     if (state.fieldInfos.hasProx()) {
       final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
-      state.flushedFiles.add(frqFileName);
       freqOut = factory.createOutput(state.directory, frqFileName);
       freqIndex = freqOut.index();
 
       final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
       posOut = factory.createOutput(state.directory, posFileName);
-      state.flushedFiles.add(posFileName);
       posIndex = posOut.index();
 
       // TODO: -- only if at least one field stores payloads?
       final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
-      state.flushedFiles.add(payloadFileName);
       payloadOut = state.directory.createOutput(payloadFileName);
 
     } else {
@@ -114,7 +110,6 @@ public final class SepPostingsWriterImpl
     }
 
     final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
-    state.flushedFiles.add(skipFileName);
     skipOut = state.directory.createOutput(skipFileName);
 
     totalNumDocs = state.numDocs;
@@ -216,6 +211,7 @@ public final class SepPostingsWriterImpl
     assert !omitTF;
 
     final int delta = position - lastPosition;
+    assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
     lastPosition = position;
 
     if (storePayloads) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Thu Jan 13 02:09:33 2011
@@ -131,9 +131,8 @@ class SimpleTextFieldsReader extends Fie
 
     public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
 
-      fstEnum.reset();
       //System.out.println("seek to text=" + text.utf8ToString());
-      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.advance(text);
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.seekCeil(text);
       if (result == null) {
         //System.out.println("  end");
         return SeekStatus.END;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java Thu Jan 13 02:09:33 2011
@@ -46,7 +46,6 @@ class SimpleTextFieldsWriter extends Fie
   public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
     final String fileName = SimpleTextCodec.getPostingsFileName(state.segmentName, state.codecId);
     out = state.directory.createOutput(fileName);
-    state.flushedFiles.add(fileName);
   }
 
   private void write(String s) throws IOException {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Thu Jan 13 02:09:33 2011
@@ -31,8 +31,8 @@ import org.apache.lucene.index.codecs.Po
 import org.apache.lucene.index.codecs.PostingsReaderBase;
 import org.apache.lucene.index.codecs.TermsIndexWriterBase;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
 import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
 import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
 import org.apache.lucene.store.Directory;
@@ -56,7 +56,7 @@ public class StandardCodec extends Codec
     TermsIndexWriterBase indexWriter;
     boolean success = false;
     try {
-      indexWriter = new FixedGapTermsIndexWriter(state);
+      indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
       success = true;
     } finally {
       if (!success) {
@@ -89,12 +89,11 @@ public class StandardCodec extends Codec
 
     boolean success = false;
     try {
-      indexReader = new FixedGapTermsIndexReader(state.dir,
-                                                       state.fieldInfos,
-                                                       state.segmentInfo.name,
-                                                       state.termsIndexDivisor,
-                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
-                                                       state.codecId);
+      indexReader = new VariableGapTermsIndexReader(state.dir,
+                                                    state.fieldInfos,
+                                                    state.segmentInfo.name,
+                                                    state.termsIndexDivisor,
+                                                    state.codecId);
       success = true;
     } finally {
       if (!success) {
@@ -136,7 +135,7 @@ public class StandardCodec extends Codec
   public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
     StandardPostingsReader.files(dir, segmentInfo, id, files);
     PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
-    FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
+    VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
   }
 
   @Override
@@ -148,6 +147,6 @@ public class StandardCodec extends Codec
     extensions.add(FREQ_EXTENSION);
     extensions.add(PROX_EXTENSION);
     PrefixCodedTermsReader.getExtensions(extensions);
-    FixedGapTermsIndexReader.getIndexExtensions(extensions);
+    VariableGapTermsIndexReader.getIndexExtensions(extensions);
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Thu Jan 13 02:09:33 2011
@@ -61,14 +61,12 @@ public final class StandardPostingsWrite
   public StandardPostingsWriter(SegmentWriteState state) throws IOException {
     super();
     String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
-    state.flushedFiles.add(fileName);
     freqOut = state.directory.createOutput(fileName);
 
     if (state.fieldInfos.hasProx()) {
       // At least one field does not omit TF, so create the
       // prox file
       fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
-      state.flushedFiles.add(fileName);
       proxOut = state.directory.createOutput(fileName);
     } else {
       // Every field omits TF so we will write no prox file
@@ -157,7 +155,7 @@ public final class StandardPostingsWrite
 
     final int delta = position - lastPosition;
     
-    assert delta > 0 || position == 0 || position == -1: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
+    assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
 
     lastPosition = position;
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Thu Jan 13 02:09:33 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -62,10 +63,12 @@ public class BooleanQuery extends Query 
   }
 
   private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();
-  private boolean disableCoord;
+  private final boolean disableCoord;
 
   /** Constructs an empty boolean query. */
-  public BooleanQuery() {}
+  public BooleanQuery() {
+    disableCoord = false;
+  }
 
   /** Constructs an empty boolean query.
    *
@@ -86,22 +89,6 @@ public class BooleanQuery extends Query 
    */
   public boolean isCoordDisabled() { return disableCoord; }
 
-  // Implement coord disabling.
-  // Inherit javadoc.
-  @Override
-  public Similarity getSimilarity(Searcher searcher) {
-    Similarity result = super.getSimilarity(searcher);
-    if (disableCoord) {                           // disable coord as requested
-      result = new SimilarityDelegator(result) {
-          @Override
-          public float coord(int overlap, int maxOverlap) {
-            return 1.0f;
-          }
-        };
-    }
-    return result;
-  }
-
   /**
    * Specifies a minimum number of the optional BooleanClauses
    * which must be satisfied.
@@ -178,10 +165,12 @@ public class BooleanQuery extends Query 
     protected Similarity similarity;
     protected ArrayList<Weight> weights;
     protected int maxCoord;  // num optional + num required
+    private final boolean disableCoord;
 
-    public BooleanWeight(Searcher searcher)
+    public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
       throws IOException {
       this.similarity = getSimilarity(searcher);
+      this.disableCoord = disableCoord;
       weights = new ArrayList<Weight>(clauses.size());
       for (int i = 0 ; i < clauses.size(); i++) {
         BooleanClause c = clauses.get(i);
@@ -223,7 +212,7 @@ public class BooleanQuery extends Query 
     }
 
     @Override
-    public Explanation explain(IndexReader reader, int doc)
+    public Explanation explain(AtomicReaderContext context, int doc)
       throws IOException {
       final int minShouldMatch =
         BooleanQuery.this.getMinimumNumberShouldMatch();
@@ -237,7 +226,7 @@ public class BooleanQuery extends Query 
       for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
         Weight w = wIter.next();
         BooleanClause c = cIter.next();
-        if (w.scorer(reader, true, true) == null) {
+        if (w.scorer(context, true, true) == null) {
           if (c.isRequired()) {
             fail = true;
             Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
@@ -245,7 +234,7 @@ public class BooleanQuery extends Query 
           }
           continue;
         }
-        Explanation e = w.explain(reader, doc);
+        Explanation e = w.explain(context, doc);
         if (e.isMatch()) {
           if (!c.isProhibited()) {
             sumExpl.addDetail(e);
@@ -284,10 +273,10 @@ public class BooleanQuery extends Query 
       sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
       sumExpl.setValue(sum);
       
-      float coordFactor = similarity.coord(coord, maxCoord);
-      if (coordFactor == 1.0f)                      // coord is no-op
+      final float coordFactor = disableCoord ? 1.0f : similarity.coord(coord, maxCoord);
+      if (coordFactor == 1.0f) {
         return sumExpl;                             // eliminate wrapper
-      else {
+      } else {
         ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
                                                            sum*coordFactor,
                                                            "product of:");
@@ -299,7 +288,7 @@ public class BooleanQuery extends Query 
     }
 
     @Override
-    public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer)
+    public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer)
         throws IOException {
       List<Scorer> required = new ArrayList<Scorer>();
       List<Scorer> prohibited = new ArrayList<Scorer>();
@@ -307,7 +296,7 @@ public class BooleanQuery extends Query 
       Iterator<BooleanClause> cIter = clauses.iterator();
       for (Weight w  : weights) {
         BooleanClause c =  cIter.next();
-        Scorer subScorer = w.scorer(reader, true, false);
+        Scorer subScorer = w.scorer(context, true, false);
         if (subScorer == null) {
           if (c.isRequired()) {
             return null;
@@ -323,7 +312,7 @@ public class BooleanQuery extends Query 
       
       // Check if we can return a BooleanScorer
       if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
-        return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
+        return new BooleanScorer(this, disableCoord, similarity, minNrShouldMatch, optional, prohibited, maxCoord);
       }
       
       if (required.size() == 0 && optional.size() == 0) {
@@ -337,7 +326,7 @@ public class BooleanQuery extends Query 
       }
       
       // Return a BooleanScorer2
-      return new BooleanScorer2(this, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
+      return new BooleanScorer2(this, disableCoord, similarity, minNrShouldMatch, required, prohibited, optional, maxCoord);
     }
     
     @Override
@@ -362,8 +351,8 @@ public class BooleanQuery extends Query 
   }
 
   @Override
-  public Weight createWeight(Searcher searcher) throws IOException {
-    return new BooleanWeight(searcher);
+  public Weight createWeight(IndexSearcher searcher) throws IOException {
+    return new BooleanWeight(searcher, disableCoord);
   }
 
   @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java Thu Jan 13 02:09:33 2011
@@ -197,9 +197,9 @@ final class BooleanScorer extends Scorer
   private Bucket current;
   private int doc = -1;
   
-  BooleanScorer(Weight weight, Similarity similarity, int minNrShouldMatch,
+  BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
-    super(similarity, weight);
+    super(null, weight);   // Similarity not used
     this.minNrShouldMatch = minNrShouldMatch;
 
     if (optionalScorers != null && optionalScorers.size() > 0) {
@@ -222,15 +222,14 @@ final class BooleanScorer extends Scorer
     }
 
     coordFactors = new float[optionalScorers.size() + 1];
-    Similarity sim = getSimilarity();
     for (int i = 0; i < coordFactors.length; i++) {
-      coordFactors[i] = sim.coord(i, maxCoord); 
+      coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord); 
     }
   }
 
   // firstDocID is ignored since nextDoc() initializes 'current'
   @Override
-  protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+  public boolean score(Collector collector, int max, int firstDocID) throws IOException {
     boolean more;
     Bucket tmp;
     BucketScorer bs = new BucketScorer();

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Thu Jan 13 02:09:33 2011
@@ -42,14 +42,12 @@ class BooleanScorer2 extends Scorer {
     int maxCoord = 0; // to be increased for each non prohibited scorer
     int nrMatchers; // to be increased by score() of match counting scorers.
     
-    void init() { // use after all scorers have been added.
+    void init(Similarity sim, boolean disableCoord) { // use after all scorers have been added.
       coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1];
-      Similarity sim = getSimilarity();
       for (int i = 0; i < coordFactors.length; i++) {
-        coordFactors[i] = sim.coord(i, maxCoord);
+        coordFactors[i] = disableCoord ? 1.0f : sim.coord(i, maxCoord);
       }
     }
-    
   }
 
   private final Coordinator coordinator;
@@ -82,9 +80,9 @@ class BooleanScorer2 extends Scorer {
    * @param optional
    *          the list of optional scorers.
    */
-  public BooleanScorer2(Weight weight, Similarity similarity, int minNrShouldMatch,
+  public BooleanScorer2(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
-    super(similarity, weight);
+    super(null, weight);   // Similarity not used
     if (minNrShouldMatch < 0) {
       throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
     }
@@ -96,8 +94,8 @@ class BooleanScorer2 extends Scorer {
     requiredScorers = required;    
     prohibitedScorers = prohibited;
     
-    coordinator.init();
-    countingSumScorer = makeCountingSumScorer();
+    coordinator.init(similarity, disableCoord);
+    countingSumScorer = makeCountingSumScorer(disableCoord, similarity);
   }
   
   /** Count a scorer as a single match. */
@@ -109,7 +107,7 @@ class BooleanScorer2 extends Scorer {
     private float lastDocScore = Float.NaN;
 
     SingleMatchScorer(Scorer scorer) {
-      super(scorer.getSimilarity());
+      super(null); // No similarity used.
       this.scorer = scorer;
     }
 
@@ -164,12 +162,12 @@ class BooleanScorer2 extends Scorer {
     };
   }
 
-  private static final Similarity defaultSimilarity = Similarity.getDefault();
-
-  private Scorer countingConjunctionSumScorer(List<Scorer> requiredScorers) throws IOException {
+  private Scorer countingConjunctionSumScorer(boolean disableCoord,
+                                              Similarity similarity,
+                                              List<Scorer> requiredScorers) throws IOException {
     // each scorer from the list counted as a single matcher
     final int requiredNrMatchers = requiredScorers.size();
-    return new ConjunctionScorer(defaultSimilarity, requiredScorers) {
+    return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) {
       private int lastScoredDoc = -1;
       // Save the score of lastScoredDoc, so that we don't compute it more than
       // once in score().
@@ -192,8 +190,10 @@ class BooleanScorer2 extends Scorer {
     };
   }
 
-  private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) throws IOException { // non counting.
-    return new ConjunctionScorer(defaultSimilarity, req1, req2);
+  private Scorer dualConjunctionSumScorer(boolean disableCoord,
+                                          Similarity similarity,
+                                          Scorer req1, Scorer req2) throws IOException { // non counting.
+    return new ConjunctionScorer(disableCoord ? 1.0f : similarity.coord(2, 2), req1, req2);
     // All scorers match, so defaultSimilarity always has 1 as
     // the coordination factor.
     // Therefore the sum of the scores of two scorers
@@ -203,13 +203,14 @@ class BooleanScorer2 extends Scorer {
   /** Returns the scorer to be used for match counting and score summing.
    * Uses requiredScorers, optionalScorers and prohibitedScorers.
    */
-  private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher
+  private Scorer makeCountingSumScorer(boolean disableCoord,
+                                       Similarity similarity) throws IOException { // each scorer counted as a single matcher
     return (requiredScorers.size() == 0)
-          ? makeCountingSumScorerNoReq()
-          : makeCountingSumScorerSomeReq();
+      ? makeCountingSumScorerNoReq(disableCoord, similarity)
+      : makeCountingSumScorerSomeReq(disableCoord, similarity);
   }
 
-  private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers
+  private Scorer makeCountingSumScorerNoReq(boolean disableCoord, Similarity similarity) throws IOException { // No required scorers
     // minNrShouldMatch optional scorers are required, but at least 1
     int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch;
     Scorer requiredCountingSumScorer;
@@ -217,24 +218,27 @@ class BooleanScorer2 extends Scorer {
       requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired);
     else if (optionalScorers.size() == 1)
       requiredCountingSumScorer = new SingleMatchScorer(optionalScorers.get(0));
-    else
-      requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers);
+    else {
+      requiredCountingSumScorer = countingConjunctionSumScorer(disableCoord, similarity, optionalScorers);
+    }
     return addProhibitedScorers(requiredCountingSumScorer);
   }
 
-  private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer.
+  private Scorer makeCountingSumScorerSomeReq(boolean disableCoord, Similarity similarity) throws IOException { // At least one required scorer.
     if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required.
       ArrayList<Scorer> allReq = new ArrayList<Scorer>(requiredScorers);
       allReq.addAll(optionalScorers);
-      return addProhibitedScorers(countingConjunctionSumScorer(allReq));
+      return addProhibitedScorers(countingConjunctionSumScorer(disableCoord, similarity, allReq));
     } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
       Scorer requiredCountingSumScorer =
             requiredScorers.size() == 1
             ? new SingleMatchScorer(requiredScorers.get(0))
-            : countingConjunctionSumScorer(requiredScorers);
+            : countingConjunctionSumScorer(disableCoord, similarity, requiredScorers);
       if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers
         return addProhibitedScorers( 
                       dualConjunctionSumScorer( // non counting
+                              disableCoord,
+                              similarity,
                               requiredCountingSumScorer,
                               countingDisjunctionSumScorer(
                                       optionalScorers,
@@ -276,7 +280,7 @@ class BooleanScorer2 extends Scorer {
   }
   
   @Override
-  protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+  public boolean score(Collector collector, int max, int firstDocID) throws IOException {
     doc = firstDocID;
     collector.setScorer(this);
     while (doc < max) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttribute.java Thu Jan 13 02:09:33 2011
@@ -21,7 +21,7 @@ import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeSource; // javadocs only
 import org.apache.lucene.index.TermsEnum; // javadocs only
 
-/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}
+/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link MultiTermQuery#getTermsEnum(Terms,AttributeSource)}
  * and update the boost on each returned term. This enables to control the boost factor
  * for each matching term in {@link MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE} or
  * {@link TopTermsRewrite} mode.

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java Thu Jan 13 02:09:33 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search;
 
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.util.Bits;
 
 import java.io.IOException;
@@ -61,8 +61,8 @@ public class CachingSpanFilter extends S
   }
 
   @Override
-  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
-    SpanFilterResult result = getCachedResult(reader);
+  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+    SpanFilterResult result = getCachedResult(context.reader);
     return result != null ? result.getDocIdSet() : null;
   }
   
@@ -72,7 +72,7 @@ public class CachingSpanFilter extends S
   private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
 
     final Object coreKey = reader.getCoreCacheKey();
-    final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey;
+    final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey;
 
     SpanFilterResult result = cache.get(reader, coreKey, delCoreKey);
     if (result != null) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java Thu Jan 13 02:09:33 2011
@@ -23,7 +23,7 @@ import java.util.Map;
 import java.util.WeakHashMap;
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.util.OpenBitSetDISI;
 import org.apache.lucene.util.Bits;
 
@@ -38,6 +38,9 @@ import org.apache.lucene.util.Bits;
  * {@link DeletesMode#DYNAMIC}).
  */
 public class CachingWrapperFilter extends Filter {
+  // TODO: make this filter aware of ReaderContext. a cached filter could 
+  // specify the actual readers key or something similar to indicate on which
+  // level of the readers hierarchy it should be cached.
   Filter filter;
 
   /**
@@ -105,7 +108,7 @@ public class CachingWrapperFilter extend
           // deletions
           value = cache.get(coreKey);
           if (value != null) {
-            final Bits delDocs = MultiFields.getDeletedDocs(reader);
+            final Bits delDocs = reader.getDeletedDocs();
             if (delDocs != null) {
               value = mergeDeletes(delDocs, value);
             }
@@ -192,10 +195,10 @@ public class CachingWrapperFilter extend
   int hitCount, missCount;
 
   @Override
-  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
-
+  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+    final IndexReader reader = context.reader;
     final Object coreKey = reader.getCoreCacheKey();
-    final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey;
+    final Object delCoreKey = reader.hasDeletions() ? reader.getDeletedDocs() : coreKey;
 
     DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey);
     if (docIdSet != null) {
@@ -206,7 +209,7 @@ public class CachingWrapperFilter extend
     missCount++;
 
     // cache miss
-    docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader);
+    docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader);
 
     if (docIdSet != null) {
       cache.put(coreKey, delCoreKey, docIdSet);

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Collector.java Thu Jan 13 02:09:33 2011
@@ -136,7 +136,7 @@ public abstract class Collector {
    * 
    * <p>
    * Note: This is called in an inner search loop. For good search performance,
-   * implementations of this method should not call {@link Searcher#doc(int)} or
+   * implementations of this method should not call {@link IndexSearcher#doc(int)} or
    * {@link org.apache.lucene.index.IndexReader#document(int)} on every hit.
    * Doing so can slow searches by an order of magnitude or more.
    */

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java Thu Jan 13 02:09:33 2011
@@ -29,14 +29,14 @@ class ConjunctionScorer extends Scorer {
   private final float coord;
   private int lastDoc = -1;
 
-  public ConjunctionScorer(Similarity similarity, Collection<Scorer> scorers) throws IOException {
-    this(similarity, scorers.toArray(new Scorer[scorers.size()]));
+  public ConjunctionScorer(float coord, Collection<Scorer> scorers) throws IOException {
+    this(coord, scorers.toArray(new Scorer[scorers.size()]));
   }
 
-  public ConjunctionScorer(Similarity similarity, Scorer... scorers) throws IOException {
-    super(similarity);
+  public ConjunctionScorer(float coord, Scorer... scorers) throws IOException {
+    super(null);
     this.scorers = scorers;
-    coord = similarity.coord(scorers.length, scorers.length);
+    this.coord = coord;
     
     for (int i = 0; i < scorers.length; i++) {
       if (scorers[i].nextDoc() == NO_MORE_DOCS) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java Thu Jan 13 02:09:33 2011
@@ -103,7 +103,7 @@ class ConstantScoreAutoRewrite extends T
         addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f);
       }
       // Strip scores
-      final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+      final Query result = new ConstantScoreQuery(bq);
       result.setBoost(query.getBoost());
       query.incTotalNumberOfTerms(size);
       return result;



Mime
View raw message