lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r824918 [6/11] - in /lucene/java/branches/flex_1458: contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/ contrib/benchmark/src/test/org/apache/lucene/benc...
Date Tue, 13 Oct 2009 20:44:59 GMT
Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,310 @@
+package org.apache.lucene.index.codecs.preflex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.cache.Cache;
+import org.apache.lucene.util.cache.SimpleLRUCache;
+
+/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
+ * Directory.  Pairs are accessed either by Term or by ordinal position the
+ * set
+ * @deprecated This class has been replaced by
+ * FormatPostingsTermsDictReader, except for reading old segments. */
+// nocommit -- public
+public final class TermInfosReader {
+  private final Directory directory;
+  private final String segment;
+  private final FieldInfos fieldInfos;
+
+  private final CloseableThreadLocal threadResources = new CloseableThreadLocal();
+  private final SegmentTermEnum origEnum;
+  private final long size;
+
+  private final Term[] indexTerms;
+  private final TermInfo[] indexInfos;
+  private final long[] indexPointers;
+  
+  private final int totalIndexInterval;
+
+  private final static int DEFAULT_CACHE_SIZE = 1024;
+  
+  /**
+   * Per-thread resources managed by ThreadLocal
+   */
+  private static final class ThreadResources {
+    SegmentTermEnum termEnum;
+    
+    // Used for caching the least recently looked-up Terms
+    Cache termInfoCache;
+  }
+  
+  TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
+       throws CorruptIndexException, IOException {
+    boolean success = false;
+
+    if (indexDivisor < 1 && indexDivisor != -1) {
+      throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
+    }
+
+    try {
+      directory = dir;
+      segment = seg;
+      fieldInfos = fis;
+
+      origEnum = new SegmentTermEnum(directory.openInput(segment + "." + PreFlexCodec.TERMS_EXTENSION,
+          readBufferSize), fieldInfos, false);
+      size = origEnum.size;
+
+
+      if (indexDivisor != -1) {
+        // Load terms index
+        totalIndexInterval = origEnum.indexInterval * indexDivisor;
+        final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + PreFlexCodec.TERMS_INDEX_EXTENSION,
+                                                                                  readBufferSize), fieldInfos, true);
+
+        try {
+          int indexSize = 1+((int)indexEnum.size-1)/indexDivisor;  // otherwise read index
+
+          indexTerms = new Term[indexSize];
+          indexInfos = new TermInfo[indexSize];
+          indexPointers = new long[indexSize];
+        
+          for (int i = 0; indexEnum.next(); i++) {
+            indexTerms[i] = indexEnum.term();
+            indexInfos[i] = indexEnum.termInfo();
+            indexPointers[i] = indexEnum.indexPointer;
+        
+            for (int j = 1; j < indexDivisor; j++)
+              if (!indexEnum.next())
+                break;
+          }
+        } finally {
+          indexEnum.close();
+        }
+      } else {
+        // Do not load terms index:
+        totalIndexInterval = -1;
+        indexTerms = null;
+        indexInfos = null;
+        indexPointers = null;
+      }
+      success = true;
+    } finally {
+      // With lock-less commits, it's entirely possible (and
+      // fine) to hit a FileNotFound exception above. In
+      // this case, we want to explicitly close any subset
+      // of things that were opened so that we don't have to
+      // wait for a GC to do so.
+      if (!success) {
+        close();
+      }
+    }
+  }
+
+  public int getSkipInterval() {
+    return origEnum.skipInterval;
+  }
+  
+  public int getMaxSkipLevels() {
+    return origEnum.maxSkipLevels;
+  }
+
+  final void close() throws IOException {
+    if (origEnum != null)
+      origEnum.close();
+    threadResources.close();
+  }
+
+  /** Returns the number of term/value pairs in the set. */
+  final long size() {
+    return size;
+  }
+
+  private ThreadResources getThreadResources() {
+    ThreadResources resources = (ThreadResources)threadResources.get();
+    if (resources == null) {
+      resources = new ThreadResources();
+      resources.termEnum = terms();
+      // Cache does not have to be thread-safe, it is only used by one thread at the same time
+      resources.termInfoCache = new SimpleLRUCache(DEFAULT_CACHE_SIZE);
+      threadResources.set(resources);
+    }
+    return resources;
+  }
+
+
+  /** Returns the offset of the greatest index entry which is less than or equal to term.*/
+  private final int getIndexOffset(Term term) {
+    int lo = 0;					  // binary search indexTerms[]
+    int hi = indexTerms.length - 1;
+
+    while (hi >= lo) {
+      int mid = (lo + hi) >>> 1;
+      int delta = term.compareTo(indexTerms[mid]);
+      if (delta < 0)
+	hi = mid - 1;
+      else if (delta > 0)
+	lo = mid + 1;
+      else
+	return mid;
+    }
+    return hi;
+  }
+
+  private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
+    enumerator.seek(indexPointers[indexOffset],
+                   (indexOffset * totalIndexInterval) - 1,
+                   indexTerms[indexOffset], indexInfos[indexOffset]);
+  }
+
+  /** Returns the TermInfo for a Term in the set, or null. */
+  TermInfo get(Term term) throws IOException {
+    return get(term, true);
+  }
+  
+  /** Returns the TermInfo for a Term in the set, or null. */
+  private TermInfo get(Term term, boolean useCache) throws IOException {
+    if (size == 0) return null;
+
+    ensureIndexIsRead();
+
+    TermInfo ti;
+    ThreadResources resources = getThreadResources();
+    Cache cache = null;
+    
+    if (useCache) {
+      cache = resources.termInfoCache;
+      // check the cache first if the term was recently looked up
+      ti = (TermInfo) cache.get(term);
+      if (ti != null) {
+        return ti;
+      }
+    }
+
+    // nocommit -- make sure these optimizations survive
+    // into flex 
+
+    // optimize sequential access: first try scanning cached enum w/o seeking
+    SegmentTermEnum enumerator = resources.termEnum;
+    if (enumerator.term() != null                 // term is at or past current
+	&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
+	    || term.compareTo(enumerator.term()) >= 0)) {
+      int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
+      if (indexTerms.length == enumOffset	  // but before end of block
+    || term.compareTo(indexTerms[enumOffset]) < 0) {
+       // no need to seek
+
+        int numScans = enumerator.scanTo(term);
+        if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
+          ti = enumerator.termInfo();
+          if (cache != null && numScans > 1) {
+            // we only  want to put this TermInfo into the cache if
+            // scanEnum skipped more than one dictionary entry.
+            // This prevents RangeQueries or WildcardQueries to 
+            // wipe out the cache when they iterate over a large numbers
+            // of terms in order
+            cache.put(term, ti);
+          }
+        } else {
+          ti = null;
+        }
+
+        return ti;
+      }  
+    }
+
+    // random-access: must seek
+    seekEnum(enumerator, getIndexOffset(term));
+    enumerator.scanTo(term);
+    if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
+      ti = enumerator.termInfo();
+      if (cache != null) {
+        cache.put(term, ti);
+      }
+    } else {
+      ti = null;
+    }
+    return ti;
+  }
+
+  /** Returns the nth term in the set. */
+  final Term get(int position) throws IOException {
+    if (size == 0) return null;
+
+    SegmentTermEnum enumerator = getThreadResources().termEnum;
+    if (enumerator.term() != null &&
+        position >= enumerator.position &&
+	position < (enumerator.position + totalIndexInterval))
+      return scanEnum(enumerator, position);      // can avoid seek
+
+    seekEnum(enumerator, position/totalIndexInterval); // must seek
+    return scanEnum(enumerator, position);
+  }
+
+  private final Term scanEnum(SegmentTermEnum enumerator, int position) throws IOException {
+    while(enumerator.position < position)
+      if (!enumerator.next())
+	return null;
+
+    return enumerator.term();
+  }
+
+  private void ensureIndexIsRead() {
+    if (indexTerms == null) {
+      throw new IllegalStateException("terms index was not loaded when this reader was created");
+    }
+  }
+
+  /** Returns the position of a Term in the set or -1. */
+  final long getPosition(Term term) throws IOException {
+    if (size == 0) return -1;
+
+    ensureIndexIsRead();
+    int indexOffset = getIndexOffset(term);
+    
+    SegmentTermEnum enumerator = getThreadResources().termEnum;
+    seekEnum(enumerator, indexOffset);
+
+    while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
+
+    if (term.compareTo(enumerator.term()) == 0)
+      return enumerator.position;
+    else
+      return -1;
+  }
+
+  /** Returns an enumeration of all the Terms and TermInfos in the set. */
+  public SegmentTermEnum terms() {
+    return (SegmentTermEnum) origEnum.clone();
+  }
+
+  /** Returns an enumeration of terms starting at or after the named term. */
+  public SegmentTermEnum terms(Term term) throws IOException {
+    // don't use the cache in this call because we want to reposition the
+    // enumeration
+    get(term, false);
+    return (SegmentTermEnum)getThreadResources().termEnum.clone();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,146 @@
+package org.apache.lucene.index.codecs.pulsing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.DocsProducer;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+import org.apache.lucene.index.codecs.standard.StandardDocsReader;
+import org.apache.lucene.index.codecs.standard.StandardDocsWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.store.Directory;
+
+/** This codec "inlines" the postings for terms that have
+ *  low docFreq.  It wraps another codec, which is used for
+ *  writing the non-inlined terms.
+ *
+ *  Currently in only inlines docFreq=1 terms, and
+ *  otherwise uses the normal "standard" codec. */
+
+public class PulsingCodec extends Codec {
+
+  public PulsingCodec() {
+    name = "Pulsing";
+  }
+
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    // We wrap StandardDocsWriter, but any DocsConsumer
+    // will work:
+    DocsConsumer docsWriter = new StandardDocsWriter(state);
+
+    // Terms that have <= freqCutoff number of docs are
+    // "pulsed" (inlined):
+    final int freqCutoff = 1;
+    DocsConsumer pulsingWriter = new PulsingDocsWriter(state, freqCutoff, docsWriter);
+
+    // Terms dict index
+    StandardTermsIndexWriter indexWriter;
+    boolean success = false;
+    try {
+      indexWriter = new SimpleStandardTermsIndexWriter(state);
+      success = true;
+    } finally {
+      if (!success) {
+        pulsingWriter.close();
+      }
+    }
+
+    // Terms dict
+    success = false;
+    try {
+      FieldsConsumer ret = new StandardTermsDictWriter(indexWriter, state, pulsingWriter);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          pulsingWriter.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException {
+
+    // We wrap StandardDocsReader, but any DocsProducer
+    // will work:
+    DocsProducer docs = new StandardDocsReader(dir, si, readBufferSize);
+    DocsProducer docsReader = new PulsingDocsReader(dir, si, readBufferSize, docs);
+
+    // Terms dict index reader
+    StandardTermsIndexReader indexReader;
+
+    boolean success = false;
+    try {
+      indexReader = new SimpleStandardTermsIndexReader(dir,
+                                                       fieldInfos,
+                                                       si.name,
+                                                       indexDivisor);
+      success = true;
+    } finally {
+      if (!success) {
+        docs.close();
+      }
+    }
+
+    // Terms dict reader
+    success = false;
+    try {
+      FieldsProducer ret = new StandardTermsDictReader(indexReader,
+                                                       dir, fieldInfos, si.name,
+                                                       docsReader,
+                                                       readBufferSize);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docs.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  public void files(Directory dir, SegmentInfo segmentInfo, Collection files) {
+    StandardDocsReader.files(segmentInfo, files);
+    StandardTermsDictReader.files(segmentInfo, files);
+    SimpleStandardTermsIndexReader.files(segmentInfo, files);
+  }
+
+  public void getExtensions(Collection extensions) {
+    StandardCodec.getStandardExtensions(extensions);
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,315 @@
+package org.apache.lucene.index.codecs.pulsing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.PositionsEnum;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DocsProducer;
+import org.apache.lucene.index.codecs.pulsing.PulsingDocsWriter.Document;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+
+/** Concrete class that reads the current doc/freq/skip
+ *  postings format */
+
+// nocommit -- should we switch "hasProx" higher up?  and
+// create two separate docs readers, one that also reads
+// prox and one that doesn't?
+
+class PulsingDocsReader extends DocsProducer {
+
+  // Fallback reader for non-pulsed terms:
+  final DocsProducer wrappedDocsReader;
+  IndexInput termsIn;
+  int maxPulsingDocFreq;
+
+  PulsingDocsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, DocsProducer wrappedDocsReader) throws IOException {
+    this.wrappedDocsReader = wrappedDocsReader;
+  }
+
+  public void start(IndexInput termsIn) throws IOException {
+    this.termsIn = termsIn;
+    Codec.checkHeader(termsIn, PulsingDocsWriter.CODEC, PulsingDocsWriter.VERSION_START);
+    maxPulsingDocFreq = termsIn.readVInt();
+    wrappedDocsReader.start(termsIn);
+  }
+
+  public Reader reader(FieldInfo fieldInfo, IndexInput termsIn) throws IOException {
+    return new PulsingReader(fieldInfo, termsIn, wrappedDocsReader.reader(fieldInfo, termsIn));
+  }
+
+  class PulsingReader extends Reader {
+
+    final IndexInput termsIn;
+    final FieldInfo fieldInfo;
+    final boolean omitTF;
+    final boolean storePayloads;
+    int docFreq;
+
+    // Holds pulsed docs
+    final Document[] docs;
+
+    private boolean pendingIndexTerm;
+    private final Reader wrappedReader;
+
+    PulsingReader(FieldInfo fieldInfo, IndexInput termsIn, Reader wrappedReader) {
+      this.termsIn = termsIn;                     // not cloned
+      this.fieldInfo = fieldInfo;
+      this.wrappedReader = wrappedReader;
+      omitTF = fieldInfo.omitTermFreqAndPositions;
+      storePayloads = fieldInfo.storePayloads;
+      docs = new Document[maxPulsingDocFreq];
+      for(int i=0;i<maxPulsingDocFreq;i++) {
+        docs[i] = new Document();
+      }
+    }
+
+    public void readTerm(int docFreq, boolean isIndexTerm) throws IOException {
+
+      if (Codec.DEBUG) {
+        System.out.println("pulsr.readTerm docFreq=" + docFreq + " indexTerm=" + isIndexTerm);
+      }
+
+      this.docFreq = docFreq;
+
+      pendingIndexTerm |= isIndexTerm;
+
+      if (docFreq <= maxPulsingDocFreq) {
+
+        if (Codec.DEBUG) {
+          System.out.println("  pulsed");
+        }
+
+        // Inlined into terms dict -- read everything in
+
+        // TODO: maybe only read everything in lazily?  But
+        // then we'd need to store length so we could seek
+        // over it when docs/pos enum was not requested
+
+        // TODO: it'd be better to share this encoding logic
+        // in some inner codec that knows how to write a
+        // single doc / single position, etc.  This way if a
+        // given codec wants to store other interesting
+        // stuff, it could use this pulsing code to do so
+        int docID = 0;
+        for(int i=0;i<docFreq;i++) {
+          final Document doc = docs[i];
+          final int code = termsIn.readVInt();
+          if (omitTF) {
+            docID += code;
+            doc.numPositions = 1;
+            if (Codec.DEBUG) {
+              System.out.println("  doc=" + docID);
+            }
+          } else {
+            docID += code>>>1;
+            if ((code & 1) != 0) {
+              doc.numPositions = 1;
+            } else {
+              doc.numPositions = termsIn.readVInt();
+            }
+            
+            if (Codec.DEBUG) {
+              System.out.println("  doc=" + docID + " numPos=" + doc.numPositions);
+            }
+
+            if (doc.numPositions > doc.positions.length) {
+              doc.reallocPositions(doc.numPositions);
+            }
+
+            int position = 0;
+            int payloadLength = -1;
+
+            for(int j=0;j<doc.numPositions;j++) {
+              final PulsingDocsWriter.Position pos = doc.positions[j];
+              final int code2 = termsIn.readVInt();
+              if (storePayloads) {
+                position += code2 >>> 1;
+                if ((code2 & 1) != 0)
+                  payloadLength = termsIn.readVInt();
+                if (payloadLength > 0) {
+                  if (pos.payload == null || payloadLength > pos.payload.length) {
+                    pos.payload = new byte[ArrayUtil.getNextSize(payloadLength)];
+                  }
+                  termsIn.readBytes(pos.payload, 0, payloadLength);
+                }
+              } else {
+                position += code2;
+              }
+              pos.pos = position;
+              pos.payloadLength = payloadLength;
+            }
+          }
+          doc.docID = docID;
+        }
+        
+      } else {
+        if (Codec.DEBUG) {
+          System.out.println("  not pulsed pass isIndex=" + pendingIndexTerm);
+        }
+        wrappedReader.readTerm(docFreq, pendingIndexTerm);
+        pendingIndexTerm = false;
+      }
+    }
+
+    final PulsingDocsEnum docsEnum = new PulsingDocsEnum();
+
+    public DocsEnum docs(Bits skipDocs) throws IOException {
+      if (docFreq <= maxPulsingDocFreq) {
+        docsEnum.reset(skipDocs);
+        return docsEnum;
+      } else {
+        return wrappedReader.docs(skipDocs);
+      }
+    }
+
+    class PulsingDocsEnum extends DocsEnum {
+      int nextRead;
+      private Bits skipDocs;
+      private Document doc;
+
+      public void close() {}
+
+      void reset(Bits skipDocs) {
+        this.skipDocs = skipDocs;
+        nextRead = 0;
+      }
+
+      public int next() {
+        while(true) {
+          if (nextRead >= docFreq) {
+            return NO_MORE_DOCS;
+          } else {
+            doc = docs[nextRead++];
+            if (skipDocs == null || !skipDocs.get(doc.docID)) {
+              return doc.docID;
+            }
+          }
+        }
+      }
+
+      public int read(int[] retDocs, int[] retFreqs) {
+        final int limit;
+        int i=0;
+        // nocommit -- ob1?
+        while(nextRead < docFreq) {
+          doc = docs[nextRead++];
+          if (skipDocs == null || !skipDocs.get(doc.docID)) {
+            retDocs[i] = doc.docID;
+            if (omitTF)
+              retFreqs[i] = 0;
+            else
+              retFreqs[i] = doc.numPositions;
+            i++;
+          }
+        }
+        return i;
+      }
+
+      public int ord() {
+        assert nextRead <= docFreq;
+        return nextRead-1;
+      }
+
+      public int freq() {
+        return doc.numPositions;
+      }
+
+      class PulsingPositionsEnum extends PositionsEnum {
+        int nextRead;
+        PulsingDocsWriter.Position pos;
+
+        // nocommit -- this is only here to emulate how
+        // other codecs disallow retrieving the payload more
+        // than once
+        private boolean payloadRetrieved;
+
+        void reset() {
+          nextRead = 0;
+          payloadRetrieved = false;
+        }
+
+        public int next() {
+          assert nextRead < doc.numPositions;
+          pos = doc.positions[nextRead++];
+          payloadRetrieved = false;
+          return pos.pos;
+        }
+
+        public int getPayloadLength() {
+          return pos.payloadLength;
+        }
+
+        public boolean hasPayload() {
+          // nocommit -- maybe don't do the payloadRetrieved check?
+          return !payloadRetrieved && pos.payloadLength > 0;
+        }
+
+        public byte[] getPayload(byte[] data, int offset) {
+          // nocommit -- inefficient
+          if (!payloadRetrieved) {
+            payloadRetrieved = true;
+            System.arraycopy(pos.payload, 0, data, offset, pos.payloadLength);
+            return data;
+          } else {
+            return null;
+          }
+        }
+      }
+      
+      final PulsingPositionsEnum positions = new PulsingPositionsEnum();
+
+      public PositionsEnum positions() throws IOException {
+        positions.reset();
+        return positions;
+      }
+
+      public int advance(int target) throws IOException {
+        int doc;
+        while((doc=next()) != NO_MORE_DOCS) {
+          if (doc >= target)
+            return doc;
+        }
+        return NO_MORE_DOCS;
+      }
+    }
+
+    @Override
+    public State captureState(State reusableState) {
+      // TODO Auto-generated method stub
+      return null;
+    }
+
+    @Override
+    public void setState(State state) throws IOException {
+      // TODO Auto-generated method stub
+      
+    }
+  }
+
+  public void close() throws IOException {
+    wrappedDocsReader.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,290 @@
+package org.apache.lucene.index.codecs.pulsing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.PositionsConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+
+// TODO: we now pulse entirely according to docFreq of the
+// term; it might be better to eg pulse by "net bytes used"
+// so that a term that has only 1 doc but zillions of
+// positions would not be inlined.  Though this is
+// presumably rare in practice...
+
+final class PulsingDocsWriter extends DocsConsumer {
+
+  final static String CODEC = "PulsedPostings";
+
+  // To add a new version, increment from the last one, and
+  // change VERSION_CURRENT to point to your new version:
+  final static int VERSION_START = 0;
+
+  final static int VERSION_CURRENT = VERSION_START;
+
+  IndexOutput termsOut;
+
+  boolean omitTF;
+  boolean storePayloads;
+
+  // Starts a new term
+  FieldInfo fieldInfo;
+
+  // nocommit
+  String desc;
+
+  static class Document {
+    int docID;
+    int termDocFreq;
+    int numPositions;
+    Position[] positions;
+    Document() {
+      positions = new Position[1];
+      positions[0] = new Position();
+    }
+
+    void reallocPositions(int minSize) {
+      final Position[] newArray = new Position[ArrayUtil.getNextSize(minSize)];
+      System.arraycopy(positions, 0, newArray, 0, positions.length);
+      for(int i=positions.length;i<newArray.length;i++)
+        newArray[i] = new Position();
+      positions = newArray;
+    }
+  }
+
+  final Document[] pendingDocs;
+  int pendingDocCount = 0;
+  Document currentDoc;
+  boolean pulsed;                                 // false if we've seen > maxPulsingDocFreq docs
+
+  static class Position {
+    byte[] payload;
+    int pos;
+    int payloadLength;
+  }
+
+  // nocommit -- lazy init this?  ie, if every single term
+  // was pulsed then we never need to use this fallback?
+  // Fallback writer for non-pulsed terms:
+  final DocsConsumer wrappedDocsWriter;
+
+  /** If docFreq <= maxPulsingDocFreq, its postings are
+   *  inlined into terms dict */
+  PulsingDocsWriter(SegmentWriteState state, int maxPulsingDocFreq, DocsConsumer wrappedDocsWriter) throws IOException {
+    super();
+
+    pendingDocs = new Document[maxPulsingDocFreq];
+    for(int i=0;i<maxPulsingDocFreq;i++) {
+      pendingDocs[i] = new Document();
+    }
+
+    // We simply wrap another DocsConsumer, but only call on
+    // it when doc freq is higher than our cutoff
+    this.wrappedDocsWriter = wrappedDocsWriter;
+  }
+
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    termsOut.writeVInt(pendingDocs.length);
+    wrappedDocsWriter.start(termsOut);
+  }
+
+  public void startTerm() {
+    assert pendingDocCount == 0;
+    pulsed = false;
+  }
+
+  // nocommit -- should we NOT reuse across fields?  would
+  // be cleaner
+
+  // Currently, this instance is re-used across fields, so
+  // our parent calls setField whenever the field changes
+  public void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    omitTF = fieldInfo.omitTermFreqAndPositions;
+    storePayloads = fieldInfo.storePayloads;
+    wrappedDocsWriter.setField(fieldInfo);
+  }
+
+  /** Simply buffers up positions */
+  class PositionsWriter extends PositionsConsumer {
+    public void start(IndexOutput termsOut) {}
+    public void startTerm() {}
+    public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) {
+      Position pos = currentDoc.positions[currentDoc.numPositions++];
+      pos.pos = position;
+      if (payload != null && payloadLength > 0) {
+        if (pos.payload == null || payloadLength > pos.payload.length) {
+          pos.payload = new byte[ArrayUtil.getNextSize(payloadLength)];
+        }
+        System.arraycopy(payload, payloadOffset, pos.payload, 0, payloadLength);
+        pos.payloadLength = payloadLength;
+      } else
+        pos.payloadLength = 0;
+    }
+    public void finishDoc() {
+      assert currentDoc.numPositions == currentDoc.termDocFreq;
+    }
+    public void finishTerm(boolean isIndexTerm) {}
+    public void close() {}
+  }
+
+  final PositionsWriter posWriter = new PositionsWriter();
+
+  public PositionsConsumer addDoc(int docID, int termDocFreq) throws IOException {
+
+    assert docID >= 0: "got docID=" + docID;
+        
+    if (Codec.DEBUG)
+      System.out.println("PW.addDoc: docID=" + docID + " pendingDocCount=" + pendingDocCount + " vs " + pendingDocs.length + " pulsed=" + pulsed);
+
+    if (!pulsed && pendingDocCount == pendingDocs.length) {
+      
+      // OK we just crossed the threshold, this term should
+      // now be written with our wrapped codec:
+      wrappedDocsWriter.startTerm();
+      
+      if (Codec.DEBUG)
+        System.out.println("  now flush buffer");
+
+      // Flush all buffered docs
+      for(int i=0;i<pendingDocCount;i++) {
+        final Document doc = pendingDocs[i];
+        if (Codec.DEBUG)
+          System.out.println("  docID=" + doc.docID);
+
+        PositionsConsumer posConsumer = wrappedDocsWriter.addDoc(doc.docID, doc.termDocFreq);
+        if (!omitTF && posConsumer != null) {
+          assert doc.termDocFreq == doc.numPositions;
+          for(int j=0;j<doc.termDocFreq;j++) {
+            final Position pos = doc.positions[j];
+            if (pos.payload != null && pos.payloadLength > 0) {
+              assert storePayloads;
+              posConsumer.addPosition(pos.pos, pos.payload, 0, pos.payloadLength);
+            } else
+              posConsumer.addPosition(pos.pos, null, 0, 0);
+          }
+          posConsumer.finishDoc();
+        }
+      }
+
+      pendingDocCount = 0;
+
+      pulsed = true;
+    }
+
+    if (pulsed) {
+      // We've already seen too many docs for this term --
+      // just forward to our fallback writer
+      return wrappedDocsWriter.addDoc(docID, termDocFreq);
+    } else {
+      currentDoc = pendingDocs[pendingDocCount++];
+      currentDoc.docID = docID;
+      // nocommit -- need not store in doc?  only used for alloc & assert
+      currentDoc.termDocFreq = termDocFreq;
+      if (termDocFreq > currentDoc.positions.length) {
+        currentDoc.reallocPositions(termDocFreq);
+      }
+      currentDoc.numPositions = 0;
+      if (omitTF) {
+        return null;
+      } else {
+        return posWriter;
+      }
+    }
+  }
+
+  boolean pendingIsIndexTerm;
+
+  int pulsedCount;
+  int nonPulsedCount;
+
+  /** Called when we are done adding docs to this term */
+  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+
+    if (Codec.DEBUG)
+      System.out.println("PW: finishTerm pendingDocCount=" + pendingDocCount);
+
+    pendingIsIndexTerm |= isIndexTerm;
+
+    if (pulsed) {
+      wrappedDocsWriter.finishTerm(docCount, pendingIsIndexTerm);
+      pendingIsIndexTerm = false;
+      pulsedCount++;
+    } else {
+      nonPulsedCount++;
+      // OK, there were few enough occurrences for this
+      // term, so we fully inline our postings data into
+      // terms dict:
+      int lastDocID = 0;
+      for(int i=0;i<pendingDocCount;i++) {
+        final Document doc = pendingDocs[i];
+        final int delta = doc.docID - lastDocID;
+        lastDocID = doc.docID;
+        if (omitTF) {
+          termsOut.writeVInt(delta);
+        } else {
+          assert doc.numPositions == doc.termDocFreq;
+          if (doc.numPositions == 1)
+            termsOut.writeVInt((delta<<1)|1);
+          else {
+            termsOut.writeVInt(delta<<1);
+            termsOut.writeVInt(doc.numPositions);
+          }
+
+          // TODO: we could do better in encoding
+          // payloadLength, eg, if it's always the same
+          // across all terms
+          int lastPosition = 0;
+          int lastPayloadLength = -1;
+
+          for(int j=0;j<doc.numPositions;j++) {
+            final Position pos = doc.positions[j];
+            final int delta2 = pos.pos - lastPosition;
+            lastPosition = pos.pos;
+            if (storePayloads) {
+              if (pos.payloadLength != lastPayloadLength) {
+                termsOut.writeVInt((delta2 << 1)|1);
+                termsOut.writeVInt(pos.payloadLength);
+                lastPayloadLength = pos.payloadLength;
+              } else
+                termsOut.writeVInt(delta2 << 1);
+              if (pos.payloadLength > 0)
+                termsOut.writeBytes(pos.payload, 0, pos.payloadLength);
+            } else
+              termsOut.writeVInt(delta2);
+          }
+        }
+      }
+    }
+
+    pendingDocCount = 0;
+  }
+
+  public void close() throws IOException {
+    wrappedDocsWriter.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,64 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexInput;
+
+import java.io.IOException;
+
+/** Defines basic API for writing ints to an IndexOutput.
+ *  IntBlockCodec interacts with this API. @see
+ *  IntBlockReader */
+public abstract class IntIndexInput {
+
+  public abstract Reader reader() throws IOException;
+
+  public abstract void close() throws IOException;
+
+  public abstract Index index() throws IOException;
+
+  public abstract static class Index {
+
+    // nocommit
+    public String desc;
+
+    public abstract void read(IndexInput indexIn, boolean absolute) throws IOException;
+
+    /** Seeks primary stream to the last read offset */
+    public abstract void seek(IntIndexInput.Reader stream) throws IOException;
+
+    public abstract void set(Index other);
+  }
+
+  public static final class BulkReadResult {
+    public int[] buffer;
+    public int offset;
+    public int len;
+  };
+
+  public abstract static class Reader {
+
+    /** Reads next single int */
+    public abstract int next() throws IOException;
+
+    /** Reads next chunk of ints */
+    public abstract BulkReadResult read(int[] buffer, int count) throws IOException;
+
+    public abstract String descFilePointer() throws IOException;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,59 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * LICENSED to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: we may want tighter integration w/ IndexOutput --
+// may give better perf:
+
+import org.apache.lucene.store.IndexOutput;
+
+import java.io.IOException;
+
+/** Defines basic API for writing ints to an IndexOutput.
+ *  IntBlockCodec interacts with this API. @see
+ *  IntBlockReader.
+ *
+ * <p>NOTE: block sizes could be variable */
+public abstract class IntIndexOutput {
+  /** Write an int to the primary file */
+  public abstract void write(int v) throws IOException;
+
+  public abstract static class Index {
+
+    // nocommit
+    public String desc;
+
+    /** Internally records the current location */
+    public abstract void mark() throws IOException;
+
+    /** Copies index from other */
+    public abstract void set(Index other) throws IOException;
+
+    /** Writes "location" of current output pointer of primary
+     * output to different output (out) */
+    public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException;
+  }
+
+  /** If you are indexing the primary output file, call
+   *  this and interact with the returned IndexWriter. */
+  public abstract Index index() throws IOException;
+
+  public abstract void close() throws IOException;
+
+  public abstract String descFilePointer() throws IOException;
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntStreamFactory.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntStreamFactory.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntStreamFactory.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntStreamFactory.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,32 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.BufferedIndexInput;
+
+import java.io.IOException;
+
+public abstract class IntStreamFactory {
+  public IntIndexInput openInput(Directory dir, String fileName) throws IOException {
+    return openInput(dir, fileName, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  public abstract IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException;
+  public abstract IntIndexOutput createOutput(Directory dir, String fileName) throws IOException;
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntStreamFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,138 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.DocsProducer;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
+import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.store.Directory;
+
+public class SepCodec extends Codec {
+
+  public SepCodec() {
+    name = "Sep";
+  }
+
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+
+    DocsConsumer docsWriter = new SepDocsWriter(state, new SingleIntFactory());
+
+    boolean success = false;
+    StandardTermsIndexWriter indexWriter;
+    try {
+      indexWriter = new SimpleStandardTermsIndexWriter(state);
+      success = true;
+    } finally {
+      if (!success) {
+        docsWriter.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsConsumer ret = new StandardTermsDictWriter(indexWriter, state, docsWriter);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docsWriter.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  final static String DOC_EXTENSION = "doc";
+  final static String SKIP_EXTENSION = "skp";
+  final static String FREQ_EXTENSION = "frq";
+  final static String POS_EXTENSION = "pos";
+  final static String PAYLOAD_EXTENSION = "pyl";
+
+  public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException {
+
+    DocsProducer docsReader = new SepDocsReader(dir, si, readBufferSize, new SingleIntFactory());
+
+    StandardTermsIndexReader indexReader;
+    boolean success = false;
+    try {
+      indexReader = new SimpleStandardTermsIndexReader(dir,
+                                                       fieldInfos,
+                                                       si.name,
+                                                       indexDivisor);
+      success = true;
+    } finally {
+      if (!success) {
+        docsReader.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsProducer ret = new StandardTermsDictReader(indexReader,
+                                                       dir, fieldInfos, si.name,
+                                                       docsReader,
+                                                       readBufferSize);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docsReader.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  public void files(Directory dir, SegmentInfo segmentInfo, Collection files) {
+    SepDocsReader.files(segmentInfo, files);
+    StandardTermsDictReader.files(segmentInfo, files);
+    SimpleStandardTermsIndexReader.files(segmentInfo, files);
+  }
+
+  public void getExtensions(Collection extensions) {
+    getSepExtensions(extensions);
+  }
+
+  public static void getSepExtensions(Collection extensions) {
+    extensions.add(DOC_EXTENSION);
+    extensions.add(FREQ_EXTENSION);
+    extensions.add(SKIP_EXTENSION);
+    extensions.add(POS_EXTENSION);
+    extensions.add(PAYLOAD_EXTENSION);
+    StandardTermsDictReader.getExtensions(extensions);
+    SimpleStandardTermsIndexReader.getIndexExtensions(extensions);
+  }
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,550 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.index.codecs.DocsProducer;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PositionsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.codecs.Codec;
+
+/** Concrete class that reads the current doc/freq/skip
+ *  postings format */
+
+// nocommit -- should we switch "hasProx" higher up?  and
+// create two separate docs readers, one that also reads
+// prox and one that doesn't?
+
+public class SepDocsReader extends DocsProducer {
+
+  final IntIndexInput freqIn;
+  final IntIndexInput docIn;
+
+  final IndexInput skipIn;
+
+  IndexInput termsIn;
+
+  private final SepPositionsReader posReader;
+
+  int skipInterval;
+  int maxSkipLevels;
+
+  public SepDocsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory) throws IOException {
+
+    boolean success = false;
+    try {
+
+      // nocommit -- freqIn is null if omitTF?
+      final String frqFileName = IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.FREQ_EXTENSION);
+      freqIn = intFactory.openInput(dir, frqFileName);
+
+      final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.DOC_EXTENSION);
+      docIn = intFactory.openInput(dir, docFileName);
+
+      skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.SKIP_EXTENSION), readBufferSize);
+      if (segmentInfo.getHasProx()) {
+        final String posFileName = IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.POS_EXTENSION);
+        posReader = new SepPositionsReader(dir, segmentInfo, readBufferSize, intFactory);
+      } else {
+        posReader = null;
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        close();
+      }
+    }
+  }
+
+  public static void files(SegmentInfo segmentInfo, Collection files) {
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.FREQ_EXTENSION));
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.DOC_EXTENSION));
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.SKIP_EXTENSION));
+    SepPositionsReader.files(segmentInfo, files);
+  }
+
+  public void start(IndexInput termsIn) throws IOException {
+    this.termsIn = termsIn;
+
+    // Make sure we are talking to the matching past writer
+    Codec.checkHeader(termsIn, SepDocsWriter.CODEC, SepPositionsWriter.VERSION_START);
+
+    skipInterval = termsIn.readInt();
+    maxSkipLevels = termsIn.readInt();
+    if (posReader != null) {
+      posReader.start(termsIn);
+    }
+  }
+
+  public Reader reader(FieldInfo fieldInfo, IndexInput termsIn) throws IOException {
+
+    final SepPositionsReader.TermsDictReader posReader2;
+    if (posReader != null && !fieldInfo.omitTermFreqAndPositions) {
+      posReader2 = (SepPositionsReader.TermsDictReader) posReader.reader(fieldInfo, termsIn);
+    } else {
+      posReader2 = null;
+    }
+
+    return new TermsDictReader(fieldInfo, posReader2, termsIn);
+  }
+
+  public void close() throws IOException {
+    try {
+      if (freqIn != null)
+        freqIn.close();
+    } finally {
+      try {
+        if (docIn != null)
+          docIn.close();
+      } finally {
+        try {
+          if (skipIn != null)
+            skipIn.close();
+        } finally {
+          if (posReader != null)
+            posReader.close();
+        }
+      }
+    }
+  }
+
+  class TermsDictReader extends Reader {
+
+    final IndexInput termsIn;
+    final FieldInfo fieldInfo;
+    final IntIndexInput.Reader freqIn;
+    final IntIndexInput.Index freqIndex;
+    final IntIndexInput.Reader docIn;
+    final IntIndexInput.Index docIndex;
+    final private boolean omitTF;
+
+    long skipOffset;
+    int docFreq;
+
+    // TODO: abstraction violation (we are storing this with
+    // the concrete impl. as the type, not the abstract base
+    // class)
+    final SepPositionsReader.TermsDictReader posReader;
+    private SegmentDocsEnum docs;
+
+    TermsDictReader(FieldInfo fieldInfo, SepPositionsReader.TermsDictReader posReader, IndexInput termsIn) throws IOException {
+      this.termsIn = termsIn;                     // not cloned
+      this.fieldInfo = fieldInfo;
+      this.posReader = posReader;
+      this.docIn = SepDocsReader.this.docIn.reader();
+      docIndex = SepDocsReader.this.docIn.index();
+      omitTF = fieldInfo.omitTermFreqAndPositions;
+      if (!omitTF) {
+        this.freqIn = SepDocsReader.this.freqIn.reader();
+        freqIndex = SepDocsReader.this.freqIn.index();
+      } else {
+        this.freqIn = null;
+        freqIndex = null;
+        docFreq = 1;
+      }
+    }
+
+    public void readTerm(int docFreq, boolean isIndexTerm) throws IOException {
+
+      this.docFreq = docFreq;
+      if (Codec.DEBUG) {
+        System.out.println("  dr.readTerm termsFP=" + termsIn.getFilePointer() + " df=" + docFreq + " isIndex=" + isIndexTerm);
+        System.out.println("    start freqFP=" + freqIndex + " docFP=" + docIndex + " skipFP=" + skipOffset);
+      }
+
+      if (!omitTF) {
+        freqIndex.read(termsIn, isIndexTerm);
+      }
+
+      docIndex.read(termsIn, isIndexTerm);
+
+      if (isIndexTerm) {
+        skipOffset = termsIn.readVLong();
+      } else {
+        if (docFreq >= skipInterval) {
+          skipOffset += termsIn.readVLong();
+        }
+      }
+
+      if (Codec.DEBUG) {
+        System.out.println("    freqFP=" + freqIndex + " docFP=" + docIndex + " skipFP=" + skipOffset);
+      }
+
+      if (posReader != null) {
+        posReader.readTerm(docFreq, isIndexTerm);
+      }
+    }
+
+    public DocsEnum docs(Bits skipDocs) throws IOException {
+
+      if (docs == null) {
+        // Lazy init
+        docs = new SegmentDocsEnum();
+      }
+
+      docs.init(skipDocs);
+
+      return docs;
+    }
+
+    class SegmentDocsEnum extends DocsEnum {
+      int docFreq;
+      int doc;
+      int count;
+      int freq;
+      long freqStart;
+
+      // nocommit -- should we do omitTF with 2 different enum classes?
+      final boolean omitTF;
+      private Bits skipDocs;
+
+      // nocommit -- should we do hasProx with 2 different enum classes?
+
+      boolean skipped;
+      SepSkipListReader skipper;
+
+      // TODO: abstraction violation: we are storing the
+      // concrete impl, not the abstract base class
+      SepPositionsReader.TermsDictReader.SegmentPositionsEnum positions;
+
+      SegmentDocsEnum() {
+        if (Codec.DEBUG) {
+          System.out.println("new docs enum");
+        }
+        omitTF = fieldInfo.omitTermFreqAndPositions;
+        if (omitTF) {
+          freq = 1;
+        }
+      }
+
+      void init(Bits skipDocs) throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("[" + desc + "] dr.init freqIn seek " + freqIndex + " this=" + this + " (in=" + freqIn + "; this=" + this + ")");
+        }
+        this.skipDocs = skipDocs;
+
+        // nocommit: can't we only do this if consumer
+        // skipped consuming the previous docs?
+        docIndex.seek(docIn);
+
+        if (!omitTF) {
+          freqIndex.seek(freqIn);
+        }
+        this.docFreq = TermsDictReader.this.docFreq;
+        count = 0;
+        doc = 0;
+        skipped = false;
+        proxSkipFreq = 0;
+
+        // maybe not necessary?
+        proxSkipPayloadLength = -1;
+
+        // TODO: abstraction violation
+        if (posReader != null) {
+          //posIndex = posReader.posIndex;
+          posIndex = posReader.getPosIn().index();
+          posIndex.set(posReader.posIndex);
+          payloadOffset = posReader.payloadOffset;
+        }
+      }
+
+      public int next() throws IOException {
+
+        if (Codec.DEBUG) {
+          if (!omitTF) {
+            System.out.println("sdr [" + desc + "] next count=" + count + " vs df=" + docFreq + " freqFP=" + freqIn.descFilePointer() + " docFP=" + docIn.descFilePointer() + " skipDocs?=" + (skipDocs != null) );
+          } else {
+            System.out.println("sdr [" + desc + "] next count=" + count + " vs df=" + docFreq + " docFP=" + docIn.descFilePointer() + " skipDocs?=" + (skipDocs != null) );
+          }
+        }
+
+        while(true) {
+          if (count == docFreq) {
+            return NO_MORE_DOCS;
+          }
+
+          count++;
+
+          // Decode next doc
+          doc += docIn.next();
+          
+          if (!omitTF) {
+            freq = freqIn.next();
+            if (positions != null) {
+              positions.seek(freq);
+            } else {
+              proxSkipFreq += freq;
+            }
+          }
+
+          if (Codec.DEBUG) {
+            System.out.println("  decode doc=" + doc + " freq=" + freq);
+          }
+
+          if (skipDocs == null || !skipDocs.get(doc)) {
+            break;
+          } else if (Codec.DEBUG) {
+            System.out.println("  doc=" + doc + " is skipped");
+          }
+        }
+
+        // nocommit
+        if (Codec.DEBUG) {
+          if (positions != null) {
+            positions.desc = desc + ":" + doc;
+          }
+          System.out.println("  return doc=" + doc);
+        }
+        return doc;
+      }
+
+      public int read(int[] docs, int[] freqs) throws IOException {
+        // nocommit -- switch to bulk read api in IntIndexInput
+        int i = 0;
+        final int length = docs.length;
+        while (i < length && count < docFreq) {
+          count++;
+          // manually inlined call to next() for speed
+          doc += docIn.next();
+          if (!omitTF) {
+            freq = freqIn.next();
+            if (positions != null) {
+              positions.seek(freq);
+            } else {
+              proxSkipFreq += freq;
+            }
+          }
+
+          if (skipDocs == null || !skipDocs.get(doc)) {
+            docs[i] = doc;
+            freqs[i] = freq;
+            i++;
+          }
+        }
+
+        return i;
+      }
+
+      public int freq() {
+        return freq;
+      }
+
+      // Holds pending seek data for positions:
+      IntIndexInput.Index posIndex;
+      long payloadOffset;
+      int proxSkipPayloadLength;
+
+      // If we step through docs w/o getting positions for
+      // them, we accumulate how many freqs we've skipped
+      // here.  Then, when positions() is called, we skip
+      // this many positions to catch up:
+      int proxSkipFreq;
+
+      PositionsEnum fakePositions;
+
+      public PositionsEnum positions() throws IOException {
+        
+        if (Codec.DEBUG) {
+          System.out.println("sep.positions pos=" + positions + " freq=" + freq);
+        }
+
+        if (positions == null) {
+
+          // First time positions is requested from this DocsEnum
+
+          // Lazy init
+          if (posReader == null) {
+
+            // nocommit -- should we return null?
+
+            // TermFreq was omitted from this field during
+            // indexing, which means we pretend termFreq is
+            // always 1 with that 1 occurrence having
+            // position 0
+            if (fakePositions == null) {
+              fakePositions = new FakePositionsEnum();
+            }
+            if (Codec.DEBUG) {
+              System.out.println("  return fake");
+            }
+            return fakePositions;
+          } else {
+
+            // nocommit: abstraction violation
+            positions = (SepPositionsReader.TermsDictReader.SegmentPositionsEnum) posReader.positions();
+            if (Codec.DEBUG) {
+              System.out.println("pos skip posIndex=" + posIndex + " payloadlen=" + proxSkipPayloadLength + " skipPosCount= " + proxSkipFreq);
+            }
+            positions.seek(posIndex, payloadOffset, proxSkipPayloadLength);
+
+            // TODO: technically, if this positions is deep
+            // into the DocsEnum iteration, it'd pay to use
+            // the skipper to catch up, instead of linear
+            // scan:
+            positions.seek(proxSkipFreq);
+            proxSkipFreq = 0;
+          }
+        }
+
+        if (Codec.DEBUG) {
+          positions.desc = desc + ":" + doc;
+        }
+
+        positions.catchUp(freq);
+
+        return positions;
+      }
+
+      public int advance(int target) throws IOException {
+
+        // TODO: jump right to next() if target is < X away
+        // from where we are now?
+
+        if (Codec.DEBUG) {
+          System.out.println("sdr [" + desc + "]: advance target=" + target);
+        }
+
+        if (docFreq >= skipInterval) {
+
+          // There are enough docs in the posting to have
+          // skip data
+          if (skipper == null) {
+            // Lazy init
+            if (Codec.DEBUG) {
+              System.out.println("  create skipper");
+            }
+            skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
+                                            omitTF ? null : SepDocsReader.this.freqIn,
+                                            SepDocsReader.this.docIn,
+                                            posReader == null ? null : posReader.getPosIn(),
+                                            maxSkipLevels, skipInterval);
+          }
+
+          if (!skipped) {
+
+            // We haven't yet skipped for this posting,
+            // so now we init the skipper
+
+            // TODO: this is abstraction violation; instead,
+            // skipper should interact with this as a
+            // private consumer
+            skipper.init(skipOffset,
+                         docIndex,
+                         freqIndex,
+                         posReader != null ? posReader.posIndex : null,
+                         payloadOffset,
+                         docFreq,
+                         fieldInfo.storePayloads);
+
+            if (Codec.DEBUG) {
+              System.out.println("    init skipper: base skipFP=" + skipOffset + " docFP=" + docIndex + " freqFP=" + freqIndex + " proxFP=" +
+                                 (posReader != null ? posReader.posIndex : null) + " payloadFP=" + payloadOffset);
+            }
+
+            skipped = true;
+          }
+
+          final int newCount = skipper.skipTo(target); 
+
+          if (newCount > count) {
+
+            if (Codec.DEBUG) {
+              System.out.println("sdr [" + desc + "]: skipper moved to newCount=" + newCount +
+                                 " docFP=" + skipper.getDocIndex() +
+                                 " freqFP=" + skipper.getFreqIndex() +
+                                 " posFP=" + skipper.getPosIndex() +
+                                 " payloadFP=" + skipper.getPayloadPointer() +
+                                 " doc=" + skipper.getDoc());
+            }
+            
+            // Skipper did move
+            if (!omitTF) {
+              skipper.getFreqIndex().seek(freqIn);
+            }
+            skipper.getDocIndex().seek(docIn);
+            count = newCount;
+            doc = skipper.getDoc();
+
+            // TODO: abstraction violation; this should be a
+            // private interaction b/w skipper & posReader
+            if (positions != null) {
+              positions.seek(skipper.getPosIndex(),
+                             skipper.getPayloadPointer(),
+                             skipper.getPayloadLength());
+            } else {
+              if (posIndex != null) {
+                posIndex.set(skipper.getPosIndex());
+              }
+              payloadOffset = skipper.getPayloadPointer();
+              proxSkipPayloadLength = skipper.getPayloadLength();
+              proxSkipFreq = 0;
+            }
+          } else if (Codec.DEBUG) {
+            System.out.println("  no skipping to be done");
+          }
+        }
+        
+        // Now, linear scan for the rest:
+        do {
+          if (next() == NO_MORE_DOCS) {
+            return NO_MORE_DOCS;
+          }
+        } while (target > doc);
+
+        return doc;
+      }
+    }
+
+    @Override
+    public State captureState(State reusableState) {
+      // TODO Auto-generated method stub
+      return null;
+    }
+
+    @Override
+    public void setState(State state) throws IOException {
+      // TODO Auto-generated method stub
+      
+    }
+  }
+}
+
+/** Returned when someone asks for positions() enum on field
+ *  with omitTf true */
+class FakePositionsEnum extends PositionsEnum {
+  public int next() {
+    return 0;
+  }
+  public int getPayloadLength() {
+    return 0;
+  }
+  public boolean hasPayload() {
+    return false;
+  }
+  public byte[] getPayload(byte[] data, int offset) {
+    return null;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,246 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+u * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.PositionsConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.codecs.Codec;
+
+/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
+ *  to .pyl, skip data to .skp */
+
+public final class SepDocsWriter extends DocsConsumer {
+  final static String CODEC = "SepDocFreqSkip";
+
+  // Increment version to change it:
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  final IntIndexOutput freqOut;
+  final IntIndexOutput.Index freqIndex;
+
+  final IntIndexOutput docOut;
+  final IntIndexOutput.Index docIndex;
+
+  final IndexOutput skipOut;
+  IndexOutput termsOut;
+
+  final SepPositionsWriter posWriter;
+  final SepSkipListWriter skipListWriter;
+  final int skipInterval;
+  final int maxSkipLevels;
+  final int totalNumDocs;
+
+  boolean storePayloads;
+  boolean omitTF;
+
+  // Starts a new term
+  long lastSkipStart;
+
+  FieldInfo fieldInfo;
+
+  public SepDocsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException {
+    super();
+
+    final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.FREQ_EXTENSION);
+    state.flushedFiles.add(frqFileName);
+    freqOut = factory.createOutput(state.directory, frqFileName);
+    freqIndex = freqOut.index();
+
+    final String docFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.DOC_EXTENSION);
+    state.flushedFiles.add(docFileName);
+    docOut = factory.createOutput(state.directory, docFileName);
+    docIndex = docOut.index();
+
+    final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.SKIP_EXTENSION);
+    state.flushedFiles.add(skipFileName);
+    skipOut = state.directory.createOutput(skipFileName);
+
+    if (Codec.DEBUG) {
+      System.out.println("dw.init: create frq=" + frqFileName + " doc=" + docFileName + " skip=" + skipFileName);
+    }
+
+    totalNumDocs = state.numDocs;
+
+    // nocommit -- abstraction violation
+    skipListWriter = new SepSkipListWriter(state.skipInterval,
+                                           state.maxSkipLevels,
+                                           state.numDocs,
+                                           freqOut, docOut,
+                                           null, null);
+
+    skipInterval = state.skipInterval;
+    maxSkipLevels = state.maxSkipLevels;
+
+    posWriter = new SepPositionsWriter(state, this, factory);
+  }
+
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    // nocommit -- just ask skipper to "start" here
+    termsOut.writeInt(skipInterval);                // write skipInterval
+    termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    posWriter.start(termsOut);
+  }
+
+  public void startTerm() throws IOException {
+    docIndex.mark();
+    if (!omitTF) {
+      freqIndex.mark();
+      posWriter.startTerm();
+    }
+    skipListWriter.resetSkip(docIndex, freqIndex, posWriter.posIndex);
+  }
+
+  // nocommit -- should we NOT reuse across fields?  would
+  // be cleaner
+
+  // Currently, this instance is re-used across fields, so
+  // our parent calls setField whenever the field changes
+  public void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    omitTF = fieldInfo.omitTermFreqAndPositions;
+    skipListWriter.setOmitTF(omitTF);
+    storePayloads = fieldInfo.storePayloads;
+    posWriter.setField(fieldInfo);
+  }
+
+  int lastDocID;
+  int df;
+
+  int count;
+
+  /** Adds a new doc in this term.  If this returns null
+   *  then we just skip consuming positions/payloads. */
+  public PositionsConsumer addDoc(int docID, int termDocFreq) throws IOException {
+
+    final int delta = docID - lastDocID;
+
+    if (Codec.DEBUG) {
+      System.out.println("  dw.addDoc [" + desc + "] count=" + (count++) + " docID=" + docID + " lastDocID=" + lastDocID + " delta=" + delta + " omitTF=" + omitTF + " freq=" + termDocFreq);
+    }
+
+    if (docID < 0 || (df > 0 && delta <= 0)) {
+      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
+    }
+
+    if ((++df % skipInterval) == 0) {
+      // TODO: abstraction violation
+      // nocommit -- awkward we have to make these two
+      // separate calls to skipper
+      skipListWriter.setSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
+      skipListWriter.bufferSkip(df);
+
+      if (Codec.DEBUG) {
+        System.out.println("    bufferSkip lastDocID=" + lastDocID +
+                           " df=" + df +
+                           " docFP=" + docOut.descFilePointer() + 
+                           " freqFP=" + freqOut.descFilePointer() + 
+                           " posFP=" + posWriter.posOut.descFilePointer() + 
+                           " payloadFP=" + skipListWriter.payloadOutput.getFilePointer() + 
+                           " payloadLen=" + posWriter.lastPayloadLength);
+      }
+    }
+
+    lastDocID = docID;
+    docOut.write(delta);
+    if (!omitTF) {
+      freqOut.write(termDocFreq);
+    }
+
+    // nocommit
+    if (Codec.DEBUG) {
+      ((SepPositionsWriter) posWriter).desc = desc + ":" + docID;
+    }
+
+    if (omitTF) {
+      return null;
+    } else {
+      return posWriter;
+    }
+  }
+
+  /** Called when we are done adding docs to this term */
+  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+
+    long skipPos = skipOut.getFilePointer();
+
+    // nocommit -- wasteful we are counting this in two places?
+    assert docCount == df;
+    if (Codec.DEBUG) {
+      System.out.println("dw.finishTerm termsFP=" + termsOut.getFilePointer() + " df=" + df + " skipPos=" + skipPos);
+    }
+
+    if (!omitTF) {
+      freqIndex.write(termsOut, isIndexTerm);
+    }
+    docIndex.write(termsOut, isIndexTerm);
+
+    if (df >= skipInterval) {
+      if (Codec.DEBUG) {
+        System.out.println("  writeSkip skipPos=" + skipPos + " lastSkipPos=" + lastSkipStart);
+      }
+      
+      skipListWriter.writeSkip(skipOut);
+    }
+
+    if (isIndexTerm) {
+      termsOut.writeVLong(skipPos);
+      lastSkipStart = skipPos;
+    } else if (df >= skipInterval) {
+      termsOut.writeVLong(skipPos-lastSkipStart);
+      lastSkipStart = skipPos;
+    }
+
+    if (!omitTF) {
+      posWriter.finishTerm(isIndexTerm);
+    }
+
+    lastDocID = 0;
+    df = 0;
+
+    // nocommit
+    count = 0;
+  }
+
+  public void close() throws IOException {
+    if (Codec.DEBUG)
+      System.out.println("dw.close skipFP=" + skipOut.getFilePointer());
+    try {
+      freqOut.close();
+    } finally {
+      try {
+        docOut.close();
+      } finally {
+        try {
+          skipOut.close();
+        } finally {
+          posWriter.close();
+        }
+      }
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message