lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r824918 [8/11] - in /lucene/java/branches/flex_1458: contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/ contrib/benchmark/src/test/org/apache/lucene/benc...
Date Tue, 13 Oct 2009 20:44:59 GMT
Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,135 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.store.Directory;
+
+/** Current index file format */
+public class StandardCodec extends Codec {
+
+  public StandardCodec() {
+    name = "Standard";
+  }
+
+  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+    DocsConsumer docs = new StandardDocsWriter(state);
+
+    StandardTermsIndexWriter indexWriter;
+    boolean success = false;
+    try {
+      indexWriter = new SimpleStandardTermsIndexWriter(state);
+      success = true;
+    } finally {
+      if (!success) {
+        docs.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsConsumer ret = new StandardTermsDictWriter(indexWriter, state, docs);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docs.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException {
+    StandardDocsReader docs = new StandardDocsReader(dir, si, readBufferSize);
+    StandardTermsIndexReader indexReader;
+
+    // nocommit -- not clean that every codec must deal w/
+    // this... dup'd code
+    boolean success = false;
+    try {
+      indexReader = new SimpleStandardTermsIndexReader(dir,
+                                                       fieldInfos,
+                                                       si.name,
+                                                       indexDivisor);
+      success = true;
+    } finally {
+      if (!success) {
+        docs.close();
+      }
+    }
+
+    success = false;
+    try {
+      FieldsProducer ret = new StandardTermsDictReader(indexReader,
+                                                       dir, fieldInfos, si.name,
+                                                       docs,
+                                                       readBufferSize);
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        try {
+          docs.close();
+        } finally {
+          indexReader.close();
+        }
+      }
+    }
+  }
+
+  /** Extension of freq postings file */
+  static final String FREQ_EXTENSION = "frq";
+
+  /** Extension of prox postings file */
+  static final String PROX_EXTENSION = "prx";
+
+  /** Extension of terms file */
+  static final String TERMS_EXTENSION = "tis";
+
+  /** Extension of terms index file */
+  static final String TERMS_INDEX_EXTENSION = "tii";
+
+  public void files(Directory dir, SegmentInfo segmentInfo, Collection files) {
+    StandardDocsReader.files(segmentInfo, files);
+    StandardTermsDictReader.files(segmentInfo, files);
+    SimpleStandardTermsIndexReader.files(segmentInfo, files);
+  }
+
+  public void getExtensions(Collection extensions) {
+    getStandardExtensions(extensions);
+  }
+
+  public static void getStandardExtensions(Collection extensions) {
+    extensions.add(FREQ_EXTENSION);
+    extensions.add(PROX_EXTENSION);
+    StandardTermsDictReader.getExtensions(extensions);
+    SimpleStandardTermsIndexReader.getIndexExtensions(extensions);
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,525 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PositionsEnum;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.index.codecs.DocsProducer;
+
+/** Concrete class that reads the current doc/freq/skip
+ *  postings format */
+
+// nocommit -- should we switch "hasProx" higher up?  and
+// create two separate docs readers, one that also reads
+// prox and one that doesn't?
+
+public class StandardDocsReader extends DocsProducer {
+
+  final IndexInput freqIn;
+  IndexInput termsIn;
+
+  private final StandardPositionsReader posReader;
+
+  int skipInterval;
+  int maxSkipLevels;
+
+  public StandardDocsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize) throws IOException {
+    freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, StandardCodec.FREQ_EXTENSION), readBufferSize);
+
+    boolean success = false;
+    try {
+      if (segmentInfo.getHasProx()) {
+        posReader = new StandardPositionsReader(dir, segmentInfo, readBufferSize);
+      } else {
+        posReader = null;
+      }
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println(Thread.currentThread().getName() + ": sdr.init: hasProx=" + segmentInfo.getHasProx() + " posReader=" + posReader + " seg=" + segmentInfo.name + " docCount=" + segmentInfo.docCount);
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        freqIn.close();
+      }
+    }
+  }
+
+  public static void files(SegmentInfo segmentInfo, Collection files) {
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, StandardCodec.FREQ_EXTENSION));
+    StandardPositionsReader.files(segmentInfo, files);
+  }
+
+  public void start(IndexInput termsIn) throws IOException {
+    this.termsIn = termsIn;
+
+    // Make sure we are talking to the matching past writer
+    Codec.checkHeader(termsIn, StandardDocsWriter.CODEC, StandardDocsWriter.VERSION_START);
+
+    skipInterval = termsIn.readInt();
+    maxSkipLevels = termsIn.readInt();
+    if (posReader != null)
+      posReader.start(termsIn);
+  }
+
+  public Reader reader(FieldInfo fieldInfo, IndexInput termsIn) {
+
+    final StandardPositionsReader.TermsDictReader posReader2;
+    if (posReader != null && !fieldInfo.omitTermFreqAndPositions) {
+      posReader2 = (StandardPositionsReader.TermsDictReader) posReader.reader(fieldInfo, termsIn);
+    } else {
+      posReader2 = null;
+    }
+
+    return new TermsDictReader(fieldInfo, posReader2, termsIn);
+  }
+
+  public void close() throws IOException {
+    try {
+      freqIn.close();
+    } finally {
+      if (posReader != null) {
+        posReader.close();
+      }
+    }
+  }
+
+  class TermsDictReader extends Reader {
+
+    final IndexInput termsIn;
+    final FieldInfo fieldInfo;
+    long freqOffset;
+    long skipOffset;
+    int docFreq;
+
+    // TODO: abstraction violation (we are storing this with
+    // the concrete impl. as the type, not the abstract base
+    // class)
+    final StandardPositionsReader.TermsDictReader posReader;
+    private SegmentDocsEnum docs;
+
+    TermsDictReader(FieldInfo fieldInfo, StandardPositionsReader.TermsDictReader posReader, IndexInput termsIn) {
+      this.termsIn = termsIn;                     // not cloned
+      this.fieldInfo = fieldInfo;
+      this.posReader = posReader;
+      if (Codec.DEBUG) {
+        System.out.println("sdr.tdr: init");
+      }
+    }
+
+    public void readTerm(int docFreq, boolean isIndexTerm) throws IOException {
+
+      this.docFreq = docFreq;
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println("  sdr.readTerm termsInPointer=" + termsIn.getFilePointer() + " df=" + docFreq + " isIndex?=" + isIndexTerm + " posReader=" + posReader);
+      }
+
+      if (isIndexTerm) {
+        freqOffset = termsIn.readVLong();
+      } else {
+        freqOffset += termsIn.readVLong();
+      }
+
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println("    freqOffset=" + freqOffset + " vs len=" + freqIn.length());
+      }
+
+      if (docFreq >= skipInterval) {
+        skipOffset = termsIn.readVLong();
+      } else {
+        skipOffset = 0;
+      }
+
+      if (posReader != null) {
+        posReader.readTerm(docFreq, isIndexTerm);
+      }
+    }
+    
+    public class TermDictsReaderState extends State {
+      long termsInPos;
+      long freqOffset;
+      long skipOffset;
+      long freqInPos;
+      int freq;
+      long proxPos;
+      public long proxOffset;
+    }
+    
+    @Override
+    public State captureState(State reusableState) {
+      TermDictsReaderState state;
+      if(reusableState == null) {
+        state = new TermDictsReaderState();
+      } else {
+        state = (TermDictsReaderState) reusableState;
+        state.proxPos = 0;
+        state.proxOffset = 0;
+      }
+      if(posReader != null) {
+        if(posReader.positions != null) {
+          state.proxPos = posReader.positions.proxIn.getFilePointer();
+        }
+        state.proxOffset = posReader.proxOffset;
+      }
+      state.termsInPos = termsIn.getFilePointer();
+      state.freqOffset = freqOffset;
+      state.freqInPos = freqIn.getFilePointer();
+      state.freq = docFreq;
+      state.skipOffset = skipOffset;
+      return state;
+    }
+
+    @Override
+    public void setState(State state) throws IOException {
+      TermDictsReaderState readerState = (TermDictsReaderState)state;
+      skipOffset = readerState.skipOffset;
+      termsIn.seek(readerState.termsInPos);
+      freqOffset = readerState.freqOffset;
+      freqIn.seek(readerState.freqInPos);
+      docFreq = readerState.freq;
+      
+      if(posReader != null) {
+        if(posReader.positions != null) {
+          posReader.positions.proxIn.seek(readerState.proxPos);
+        }
+        posReader.proxOffset = readerState.proxOffset;
+      }
+    }
+    
+    public boolean canCaptureState() {
+      return true;
+    }
+
+    public DocsEnum docs(Bits skipDocs) throws IOException {
+
+      if (docs == null) {
+        // Lazy init
+        docs = new SegmentDocsEnum();
+      }
+
+      docs.init(skipDocs);
+
+      return docs;
+    }
+
+    class SegmentDocsEnum extends DocsEnum {
+      int docFreq;
+      int doc;
+      int count;
+      int freq;
+      long skipStart;
+      long freqStart;
+      final IndexInput freqIn;
+      // nocommit -- should we do omitTF with 2 different enum classes?
+      final boolean omitTF;
+      private Bits skipDocs;
+
+      // nocommit -- should we do hasProx with 2 different enum classes?
+
+      boolean skipped;
+      DefaultSkipListReader skipper;
+
+      // TODO: abstraction violation: we are storing the
+      // concrete impl, not the abstract base class
+      StandardPositionsReader.TermsDictReader.SegmentPositionsEnum positions;
+
+      SegmentDocsEnum() {
+        if (Codec.DEBUG) {
+          System.out.println("new docs enum");
+        }
+        this.freqIn = (IndexInput) StandardDocsReader.this.freqIn.clone();
+        omitTF = fieldInfo.omitTermFreqAndPositions;
+        if (omitTF) {
+          freq = 1;
+        }
+      }
+
+      void init(Bits skipDocs) throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("[" + desc + "] dr.init freqIn seek " + freqOffset + " this=" + this + " (in=" + freqIn + "; this=" + this + ") docFreq=" + TermsDictReader.this.docFreq);
+        }
+        this.skipDocs = skipDocs;
+        freqIn.seek(freqOffset);
+        this.docFreq = TermsDictReader.this.docFreq;
+        count = 0;
+        doc = 0;
+        skipped = false;
+        skipStart = freqStart + skipOffset;
+        proxSkipFreq = 0;
+
+        // maybe not necessary?
+        proxSkipPayloadLength = -1;
+
+        // nocommit: abstraction violation
+        if (posReader != null) {
+          proxOffset = posReader.proxOffset;
+        }
+
+        if (positions != null) {
+          positions.payloadLength = -1;
+        }
+        //new Throwable().printStackTrace(System.out);
+      }
+
+      public int next() throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("sdr.next [" + desc + "] count=" + count + " vs df=" + docFreq + " freq pointer=" + freqIn.getFilePointer() + " (in=" + freqIn + "; this=" + this + ") + has skip docs=" + (skipDocs != null));
+        }
+
+        while(true) {
+          if (count == docFreq) {
+            return NO_MORE_DOCS;
+          }
+
+          count++;
+
+          // Decode next doc/freq pair
+          final int code = freqIn.readVInt();
+          if (Codec.DEBUG) {
+            System.out.println("  read code=" + code);
+          }
+          if (omitTF)
+            doc += code;
+          else {
+            doc += code >>> 1;              // shift off low bit
+            if ((code & 1) != 0)            // if low bit is set
+              freq = 1;                     // freq is one
+            else
+              freq = freqIn.readVInt();     // else read freq
+
+            if (positions != null)
+              positions.skip(freq);
+            else
+              proxSkipFreq += freq;
+          }
+
+          if (skipDocs == null || !skipDocs.get(doc)) {
+            break;
+          } else if (Codec.DEBUG) {
+            System.out.println("  doc=" + doc + " is skipped");
+          }
+        }
+
+        // nocommit
+        if (Codec.DEBUG && positions != null) {
+          positions.desc = desc + ":" + doc;
+        }
+
+        if (Codec.DEBUG) {
+          System.out.println("  result doc=" + doc);
+        }
+        return doc;
+      }
+
+      public int read(int[] docs, int[] freqs) throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("sdr.read: count=" + count + " df=" + docFreq);
+        }
+        int i = 0;
+        final int length = docs.length;
+        while (i < length && count < docFreq) {
+          count++;
+          // manually inlined call to next() for speed
+          final int code = freqIn.readVInt();
+          if (omitTF) {
+            doc += code;
+            freq = 1;
+          } else {
+            doc += code >>> 1;              // shift off low bit
+            if ((code & 1) != 0)            // if low bit is set
+              freq = 1;                     // freq is one
+            else
+              freq = freqIn.readVInt();     // else read freq
+
+            if (positions != null)
+              positions.skip(freq);
+            else
+              proxSkipFreq += freq;
+          }
+
+          if (skipDocs == null || !skipDocs.get(doc)) {
+            docs[i] = doc;
+            freqs[i] = freq;
+            ++i;
+          }
+        }
+        if (Codec.DEBUG) {
+          System.out.println("  return " + i);
+        }
+
+        return i;
+      }
+
+      public int doc() {
+        return doc;
+      }
+
+      public int freq() {
+        return freq;
+      }
+
+      long proxOffset;
+      int proxSkipPayloadLength = -1;
+      int proxSkipFreq;
+      PositionsEnum fakePositions;
+
+      public PositionsEnum positions() throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("str.positions: create");
+        }
+        if (positions == null) {
+          // Lazy init
+          if (posReader == null) {
+            // TermFreq was omitted from this field during
+            // indexing, which means we pretend termFreq is
+            // always 1 with that 1 occurrence having
+            // position 0
+            if (fakePositions == null)
+              fakePositions = new FormatPostingsFakePositionsEnum();
+            return fakePositions;
+          } else {
+            // TODO: abstraction violation
+            positions = (StandardPositionsReader.TermsDictReader.SegmentPositionsEnum) posReader.positions();
+            if (Codec.DEBUG) {
+              System.out.println("pos skip proxOffset=" + proxOffset + " payloadlen=" + proxSkipPayloadLength + " skipPosCount= " + proxSkipFreq);
+            }
+            positions.skip(proxOffset, proxSkipPayloadLength, proxSkipFreq);
+          }
+        }
+
+        if (Codec.DEBUG) {
+          positions.desc = desc + ":" + doc;
+        }
+
+        positions.catchUp(freq);
+
+        return positions;
+      }
+
+      public int advance(int target) throws IOException {
+
+        // TODO: jump right to next() if target is < X away
+        // from where we are now?
+
+        if (Codec.DEBUG) {
+          System.out.println("dr [" + desc + "]: skip to target=" + target);
+        }
+
+        if (skipOffset > 0) {
+
+          // There are enough docs in the posting to have
+          // skip data
+          if (skipper == null) {
+            // Lazy init
+            skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
+          }
+
+          if (!skipped) {
+
+            // We haven't already skipped for this posting,
+            // so now we init the skipper
+
+            // TODO: this is abstraction violation; instead,
+            // skipper should interact with this as a
+            // private consumer
+            skipper.init(freqOffset+skipStart,
+                         freqOffset, proxOffset,
+                         docFreq, fieldInfo.storePayloads);
+
+            if (Codec.DEBUG) {
+              System.out.println("    skip reader base freqFP=" + (freqOffset+skipStart) + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
+            }
+
+            skipped = true;
+          }
+
+          final int newCount = skipper.skipTo(target); 
+
+          if (newCount > count) {
+
+            if (Codec.DEBUG) {
+              System.out.println("dr [" + desc + "]: skipper moved to newCount=" + newCount + " freqFP=" + skipper.getFreqPointer() + " proxFP=" + skipper.getProxPointer() + " doc=" + skipper.getDoc());
+            }
+
+            // Skipper did move
+            freqIn.seek(skipper.getFreqPointer());
+            count = newCount;
+            doc = skipper.getDoc();
+
+            // TODO: abstraction violation; this should be a
+            // private interaction b/w skipper & posReader
+            if (positions != null) {
+              // nocommit -- should that be count?
+              positions.skip(skipper.getProxPointer(), skipper.getPayloadLength(), 0);
+            } else {
+              proxOffset = skipper.getProxPointer();
+              proxSkipPayloadLength = skipper.getPayloadLength();
+              // nocommit -- should that be count?
+              proxSkipFreq = 0;
+            }
+          } else if (Codec.DEBUG) {
+            System.out.println("  no skipping to be done");
+          }
+        } else if (Codec.DEBUG) {
+          System.out.println("  no skip data (#docs is too low)");
+        }
+        
+        // Now, linear scan for the rest:
+        do {
+          if (next() == NO_MORE_DOCS)
+            return NO_MORE_DOCS;
+        } while (target > doc);
+
+        return doc;
+      }
+    }
+  }
+}
+
+/** Returned when someone asks for positions() enum on field
+ *  with omitTf true */
+class FormatPostingsFakePositionsEnum extends PositionsEnum {
+  @Override
+  public int next() {
+    return 0;
+  }
+  @Override
+  public int getPayloadLength() {
+    return 0;
+  }
+  @Override
+  public boolean hasPayload() {
+    return false;
+  }
+  @Override
+  public byte[] getPayload(byte[] data, int offset) {
+    return null;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,205 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Consumes doc & freq, writing them using the current
+ *  index file format */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.PositionsConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.codecs.Codec;
+
+public final class StandardDocsWriter extends DocsConsumer {
+  final static String CODEC = "SingleFileDocFreqSkip";
+  
+  // Increment version to change it:
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  final IndexOutput out;
+  final StandardPositionsWriter posWriter;
+  final DefaultSkipListWriter skipListWriter;
+  final int skipInterval;
+  final int maxSkipLevels;
+  final int totalNumDocs;
+  IndexOutput termsOut;
+
+  boolean omitTermFreqAndPositions;
+  boolean storePayloads;
+  // Starts a new term
+  long lastFreqStart;
+  long freqStart;
+  FieldInfo fieldInfo;
+
+  public StandardDocsWriter(SegmentWriteState state) throws IOException {
+    super();
+    final String fileName = IndexFileNames.segmentFileName(state.segmentName, StandardCodec.FREQ_EXTENSION);
+    state.flushedFiles.add(fileName);
+    out = state.directory.createOutput(fileName);
+    totalNumDocs = state.numDocs;
+
+    // nocommit -- abstraction violation
+    skipListWriter = new DefaultSkipListWriter(state.skipInterval,
+                                               state.maxSkipLevels,
+                                               state.numDocs,
+                                               out,
+                                               null);
+     
+    skipInterval = state.skipInterval;
+    maxSkipLevels = state.maxSkipLevels;
+
+    posWriter = new StandardPositionsWriter(state, this);
+  }
+
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    termsOut.writeInt(skipInterval);                // write skipInterval
+    termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    posWriter.start(termsOut);
+  }
+
+  public void startTerm() {
+    freqStart = out.getFilePointer();
+    if (!omitTermFreqAndPositions)
+      posWriter.startTerm();
+    skipListWriter.resetSkip();
+  }
+
+  // nocommit -- should we NOT reuse across fields?  would
+  // be cleaner
+
+  // Currently, this instance is re-used across fields, so
+  // our parent calls setField whenever the field changes
+  public void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+    storePayloads = fieldInfo.storePayloads;
+    posWriter.setField(fieldInfo);
+  }
+
+  int lastDocID;
+  int df;
+  
+  int count;
+
+  /** Adds a new doc in this term.  If this returns null
+   *  then we just skip consuming positions/payloads. */
+  public PositionsConsumer addDoc(int docID, int termDocFreq) throws IOException {
+
+    final int delta = docID - lastDocID;
+    
+    if (Codec.DEBUG) {
+      System.out.println("  dw.addDoc [" + desc + "] count=" + (count++) + " docID=" + docID + " lastDocID=" + lastDocID + " delta=" + delta + " omitTF=" + omitTermFreqAndPositions + " freq=" + termDocFreq + " freqPointer=" + out.getFilePointer());
+    }
+
+    if (docID < 0 || (df > 0 && delta <= 0)) {
+      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
+    }
+
+    if ((++df % skipInterval) == 0) {
+      // TODO: abstraction violation
+      skipListWriter.setSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
+      skipListWriter.bufferSkip(df);
+      if (Codec.DEBUG) {
+        System.out.println("    bufferSkip lastDocID=" + lastDocID + " df=" + df + " freqFP=" + out.getFilePointer() + " proxFP=" + skipListWriter.proxOutput.getFilePointer());
+      }
+    }
+
+    // nocommit -- move this assert up above; every consumer
+    // shouldn't have to check for this bug:
+    assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
+
+    lastDocID = docID;
+    if (omitTermFreqAndPositions) {
+      out.writeVInt(delta);
+    } else if (1 == termDocFreq) {
+      out.writeVInt((delta<<1) | 1);
+    } else {
+      out.writeVInt(delta<<1);
+      out.writeVInt(termDocFreq);
+    }
+
+    // nocommit
+    if (Codec.DEBUG) {
+      ((StandardPositionsWriter) posWriter).desc = desc + ":" + docID;
+    }
+
+    if (omitTermFreqAndPositions) {
+      return null;
+    } else {
+      return posWriter;
+    }
+  }
+
+  /** Called when we are done adding docs to this term */
+  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+    // nocommit -- wasteful we are counting this in two places?
+    assert docCount == df;
+    // mxx
+    if (Codec.DEBUG) {
+      System.out.println(Thread.currentThread().getName() + ": dw.finishTerm termsOut pointer=" + termsOut.getFilePointer() + " freqStart=" + freqStart + " df=" + df + " isIndex?=" + isIndexTerm);
+    }
+
+    if (isIndexTerm) {
+      // Write absolute at seek points
+      termsOut.writeVLong(freqStart);
+    } else {
+      // Write delta between seek points
+      termsOut.writeVLong(freqStart - lastFreqStart);
+    }
+
+    lastFreqStart = freqStart;
+
+    if (df >= skipInterval) {
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println(Thread.currentThread().getName() + ":  writeSkip @ freqFP=" + out.getFilePointer() + " freqStartFP=" + freqStart);
+      }
+      termsOut.writeVLong(skipListWriter.writeSkip(out)-freqStart);
+    }
+     
+    if (!omitTermFreqAndPositions) {
+      posWriter.finishTerm(isIndexTerm);
+    }
+
+
+    lastDocID = 0;
+    df = 0;
+    
+    // nocommit
+    count = 0;
+  }
+
+  public void close() throws IOException {
+    if (Codec.DEBUG)
+      System.out.println("docs writer close pointer=" + out.getFilePointer());
+    try {
+      out.close();
+    } finally {
+      posWriter.close();
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,253 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.PositionsEnum;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.PositionsProducer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+// nocommit -- base class should not be named terms dict:
+// this class interacts w/ a docsreader
+public class StandardPositionsReader extends PositionsProducer {
+  
+  final IndexInput proxIn;
+  IndexInput termsIn;
+
+  public StandardPositionsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize) throws IOException {
+    assert segmentInfo.getHasProx();
+    proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, StandardCodec.PROX_EXTENSION), readBufferSize);
+  }
+
+  public void start(IndexInput termsIn) throws IOException {
+    this.termsIn = termsIn;
+
+    Codec.checkHeader(termsIn, StandardPositionsWriter.CODEC, StandardPositionsWriter.VERSION_START);
+  }
+
+  public static void files(SegmentInfo segmentInfo, Collection files) {
+    if (segmentInfo.getHasProx()) {
+      files.add(IndexFileNames.segmentFileName(segmentInfo.name, StandardCodec.PROX_EXTENSION));
+    }
+  }
+
+  public Reader reader(FieldInfo fieldInfo, IndexInput termsIn) {
+    return new TermsDictReader(termsIn, fieldInfo);
+  }
+
+  public void close() throws IOException {
+    if (proxIn != null) {
+      proxIn.close();
+    }
+  }
+
+  class TermsDictReader extends Reader {
+
+    final IndexInput termsIn;
+    final FieldInfo fieldInfo;
+    long proxOffset;
+
+    TermsDictReader(IndexInput termsIn, FieldInfo fieldInfo) {
+      this.termsIn = termsIn;
+      this.fieldInfo = fieldInfo;
+    }
+
+    public void readTerm(int docFreq, boolean isIndexTerm) throws IOException {
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println("    pr.readterm termsInPointer=" + termsIn.getFilePointer() + " isIndex=" + isIndexTerm);
+      }
+
+      if (isIndexTerm) {
+        proxOffset = termsIn.readVLong();
+      } else {
+        proxOffset += termsIn.readVLong();
+      }
+
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println("      proxOffset=" + proxOffset);
+      }
+
+      if (positions != null) {
+        positions.seekPending = true;
+        positions.skipOffset = proxOffset;
+        positions.skipPosCount = 0;
+      }
+    }
+
+    SegmentPositionsEnum positions;
+
+    public PositionsEnum positions() throws IOException {
+
+      if (positions == null)
+        // Lazy init
+        positions = new SegmentPositionsEnum();
+
+      return positions;
+    }
+
+      // nocommit -- should we have different reader for
+      // payload vs no payload?
+    class SegmentPositionsEnum extends PositionsEnum {
+
+      // nocommit
+      String desc;
+
+      final IndexInput proxIn;
+
+      final boolean storePayloads;
+
+      boolean seekPending;                        // True if we must seek before reading next position
+      boolean payloadPending;                     // True if we must skip payload beore reading next position
+
+      long skipOffset;
+      int skipPosCount;
+
+      int position;
+      int payloadLength;
+
+      SegmentPositionsEnum() {
+        if (Codec.DEBUG) {
+          System.out.println("new pos enum");
+        }
+        proxIn = (IndexInput) StandardPositionsReader.this.proxIn.clone();
+        storePayloads = fieldInfo.storePayloads;
+      }
+
+      void skip(long proxOffset, int lastPayloadLength, int numPositions) {
+        skipOffset = proxOffset;
+        payloadLength = lastPayloadLength;
+        assert payloadLength >= 0 || payloadLength == -1;
+        skipPosCount = numPositions;
+        seekPending = true;
+        payloadPending = false;
+        if (Codec.DEBUG) {
+          System.out.println("pr [" + desc + "] skip fp= " + proxOffset + " numPositions=" + numPositions);
+        }
+      }
+
+      void skip(int numPositions) {
+        skipPosCount += numPositions;
+        if (Codec.DEBUG)
+          System.out.println("pr [" + desc + "] skip " + numPositions + " positions; now " + skipPosCount);
+      }
+
+      void catchUp(int currentCount) throws IOException { 
+        if (Codec.DEBUG) {
+          System.out.println("  pos catchup: seekPending=" + seekPending + " skipOffset=" + skipOffset + " skipPosCount " + skipPosCount + " vs currentCount " + currentCount + " payloadLen=" + payloadLength);
+        }
+
+        if (seekPending) {
+          proxIn.seek(skipOffset);
+          seekPending = false;
+        }
+
+        while(skipPosCount > currentCount) {
+          next();
+        }
+        if (Codec.DEBUG) {
+          System.out.println("  pos catchup done");
+        }
+        positions.init();
+      }
+
+      void init() {
+        if (Codec.DEBUG) {
+          System.out.println("  pos init");
+        }
+        position = 0;
+      }
+
+      public int next() throws IOException {
+
+        if (Codec.DEBUG)
+          System.out.println("    pr.next [" + desc + "]: fp=" + proxIn.getFilePointer() + " return pos=" + position);
+
+        if (storePayloads) {
+
+          if (payloadPending && payloadLength > 0) {
+            if (Codec.DEBUG)
+              System.out.println("      payload pending: skip " + payloadLength + " bytes");
+            proxIn.seek(proxIn.getFilePointer()+payloadLength);
+          }
+
+          final int code = proxIn.readVInt();
+          if ((code & 1) != 0) {
+            // Payload length has changed
+            payloadLength = proxIn.readVInt();
+            assert payloadLength >= 0;
+            if (Codec.DEBUG)
+              System.out.println("      new payloadLen=" + payloadLength);
+          }
+          assert payloadLength != -1;
+          
+          payloadPending = true;
+          position += code >>> 1;
+        } else
+          position += proxIn.readVInt();
+
+        skipPosCount--;
+
+        // NOTE: the old API actually allowed this...
+        assert skipPosCount >= 0: "next() was called too many times (more than FormatPostingsDocsEnum.freq() times)";
+
+        if (Codec.DEBUG)
+          System.out.println("   proxFP=" + proxIn.getFilePointer() + " return pos=" + position);
+        return position;
+      }
+
+      public int getPayloadLength() {
+        return payloadLength;
+      }
+
+      public byte[] getPayload(byte[] data, int offset) throws IOException {
+
+        if (!payloadPending)
+          throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+
+        final byte[] retArray;
+        final int retOffset;
+        if (data == null || data.length-offset < payloadLength) {
+          // the array is too small to store the payload data,
+          // so we allocate a new one
+          retArray = new byte[payloadLength];
+          retOffset = 0;
+        } else {
+          retArray = data;
+          retOffset = offset;
+        }
+
+        proxIn.readBytes(retArray, retOffset, payloadLength);
+        payloadPending = false;
+        return retArray;
+      }
+      
+      public boolean hasPayload() {
+        return payloadPending && payloadLength > 0;
+      }
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,151 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.PositionsConsumer;
+import org.apache.lucene.store.IndexOutput;
+
+final class StandardPositionsWriter extends PositionsConsumer {
+  final static String CODEC = "SingleFilePositionsPayloads";
+
+  // Increment version to change it:
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+  
+  final StandardDocsWriter parent;
+  final IndexOutput out;
+  
+  IndexOutput termsOut;
+
+  boolean omitTermFreqAndPositions;
+  boolean storePayloads;
+  int lastPayloadLength = -1;
+
+  // nocommit
+  String desc;
+  
+  StandardPositionsWriter(SegmentWriteState state, StandardDocsWriter parent) throws IOException {
+    this.parent = parent;
+    omitTermFreqAndPositions = parent.omitTermFreqAndPositions;
+    if (state.fieldInfos.hasProx()) {
+      // At least one field does not omit TF, so create the
+      // prox file
+      final String fileName = IndexFileNames.segmentFileName(state.segmentName, StandardCodec.PROX_EXTENSION);
+      state.flushedFiles.add(fileName);
+      out = state.directory.createOutput(fileName);
+      parent.skipListWriter.setProxOutput(out);
+    } else
+      // Every field omits TF so we will write no prox file
+      out = null;
+  }
+
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+  }
+
+  long proxStart;
+  long lastProxStart;
+
+  public void startTerm() {
+    proxStart = out.getFilePointer();
+    lastPayloadLength = -1;
+  }
+
+  
+  int lastPosition;
+
+  /** Add a new position & payload */
+  public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException {
+    assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
+    assert out != null;
+
+    if (Codec.DEBUG) {
+      if (payload != null)
+        System.out.println("pw.addPos [" + desc + "]: pos=" + position + " fp=" + out.getFilePointer() + " payload=" + payloadLength + " bytes");
+      else
+        System.out.println("pw.addPos [" + desc + "]: pos=" + position + " fp=" + out.getFilePointer());
+    }
+    
+    final int delta = position - lastPosition;
+    
+    assert delta > 0 || position == 0 || position == -1: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
+
+    lastPosition = position;
+
+    if (storePayloads) {
+      if (Codec.DEBUG) {
+        System.out.println("  store payloads");
+      }
+
+      if (payloadLength != lastPayloadLength) {
+        if (Codec.DEBUG) {
+          System.out.println("  payload len change old=" + lastPayloadLength + " new=" + payloadLength);
+        }
+
+        lastPayloadLength = payloadLength;
+        out.writeVInt((delta<<1)|1);
+        out.writeVInt(payloadLength);
+      } else
+        out.writeVInt(delta << 1);
+      if (payloadLength > 0)
+        out.writeBytes(payload, payloadLength);
+    } else
+      out.writeVInt(delta);
+  }
+
+  void setField(FieldInfo fieldInfo) {
+    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+    storePayloads = omitTermFreqAndPositions ? false : fieldInfo.storePayloads;
+  }
+
+  /** Called when we are done adding positions & payloads */
+  public void finishDoc() {       
+    lastPosition = 0;
+  }
+
+  public void finishTerm(boolean isIndexTerm) throws IOException {
+    assert !omitTermFreqAndPositions;
+
+    // mxx
+    if (Codec.DEBUG) {
+      System.out.println("poswriter finishTerm isIndex=" + isIndexTerm + " proxStart=" + proxStart + " pointer=" + termsOut.getFilePointer());
+    }
+
+    if (isIndexTerm) {
+      // Write absolute at seek points
+      termsOut.writeVLong(proxStart);
+    } else {
+      termsOut.writeVLong(proxStart-lastProxStart);
+    }
+
+    lastProxStart = proxStart;
+  }
+
+  public void close() throws IOException {
+    if (out != null) {
+      out.close();
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,496 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermRef;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DocsProducer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.DocsProducer.Reader.State;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.CloseableThreadLocal;
+
+/** Handles a terms dict, but defers all details of postings
+ *  reading to an instance of {@TermsDictDocsReader}. This
+ *  terms dict codec is meant to be shared between
+ *  different postings codecs, but, it's certainly possible
+ *  to make a codec that has its own terms dict writer/reader. */
+
+public class StandardTermsDictReader extends FieldsProducer {
+  private final IndexInput in;
+
+  private final DocsProducer docs;
+
+  final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
+
+  private final String segment;
+  private StandardTermsIndexReader indexReader;
+
+
+  public StandardTermsDictReader(StandardTermsIndexReader indexReader, Directory dir, FieldInfos fieldInfos, String segment, DocsProducer docs, int readBufferSize)
+    throws IOException {
+
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, StandardCodec.TERMS_EXTENSION), readBufferSize);
+    this.segment = segment;
+
+    boolean success = false;
+    try {
+      Codec.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT);
+
+      final long dirOffset = in.readLong();
+
+      this.docs = docs;
+      // Have DocsProducer init itself
+      docs.start(in);
+
+      // Read per-field details
+      in.seek(dirOffset);
+
+      final int numFields = in.readInt();
+
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println(Thread.currentThread().getName() + ": stdr create seg=" + segment + " numFields=" + numFields + " hasProx?=" + fieldInfos.hasProx());
+      }
+
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readInt();
+        final long numTerms = in.readLong();
+        final long termsStartPointer = in.readLong();
+        final StandardTermsIndexReader.FieldReader fieldIndexReader;
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        if (Codec.DEBUG) {
+          System.out.println("  stdr: load field=" + fieldInfo.name + " numTerms=" + numTerms);
+        }
+        if (indexReader != null) {
+          fieldIndexReader = indexReader.getField(fieldInfo);
+        } else {
+          fieldIndexReader = null;
+        }
+        if (numTerms > 0) {
+          fields.put(fieldInfo.name, new FieldReader(fieldIndexReader, fieldInfo, numTerms, termsStartPointer));
+        }
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        in.close();
+      }
+    }
+
+    this.indexReader = indexReader;
+  }
+
+  public void loadTermsIndex() throws IOException {
+    indexReader.loadTermsIndex();
+  }
+
+  public void close() throws IOException {
+    try {
+      try {
+        indexReader.close();
+      } finally {
+        in.close();
+      }
+    } finally {
+      try {
+        docs.close();
+      } finally {
+        for(FieldReader field : fields.values()) {
+          field.close();
+        }
+      }
+    }
+  }
+
+  public static void files(SegmentInfo segmentInfo, Collection files) {
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, StandardCodec.TERMS_EXTENSION));
+  }
+
+  public static void getExtensions(Collection extensions) {
+    extensions.add(StandardCodec.TERMS_EXTENSION);
+  }
+
+  @Override
+  public FieldsEnum iterator() {
+    return new TermFieldsEnum();
+  }
+
+  public Terms terms(String field) throws IOException {
+    if (Codec.DEBUG) {
+      System.out.println("stdr.terms field=" + field + " found=" + fields.get(field));
+    }
+    return fields.get(field);
+  }
+
+  // Iterates through all known fields
+  private class TermFieldsEnum extends FieldsEnum {
+    final Iterator it;
+    FieldReader current;
+
+    TermFieldsEnum() {
+      it = fields.values().iterator();
+    }
+
+    public String next() {
+      if (Codec.DEBUG) {
+        System.out.println("stdr.tfe.next seg=" + segment);
+        //new Throwable().printStackTrace(System.out);
+      }
+      if (it.hasNext()) {
+        current = (FieldReader) it.next();
+        if (Codec.DEBUG) {
+          System.out.println("  hasNext set current field=" + current.fieldInfo.name);
+        }
+        return current.fieldInfo.name;
+      } else {
+        current = null;
+        return null;
+      }
+    }
+    
+    public TermsEnum terms() throws IOException {
+      return current.iterator();
+    }
+  }
+  
+  private class FieldReader extends Terms {
+    private final CloseableThreadLocal threadResources = new CloseableThreadLocal();
+    // nocommit: check placement
+
+    final long numTerms;
+    final FieldInfo fieldInfo;
+    final long termsStartPointer;
+    final StandardTermsIndexReader.FieldReader indexReader;
+
+    FieldReader(StandardTermsIndexReader.FieldReader fieldIndexReader, FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
+      assert numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      this.numTerms = numTerms;
+      this.termsStartPointer = termsStartPointer;
+      this.indexReader = fieldIndexReader;
+    }
+
+    public void close() {
+      threadResources.close();
+    }
+    
+    private ThreadResources getThreadResources() {
+      ThreadResources resources = (ThreadResources)threadResources.get();
+      if (resources == null) {
+        resources = new ThreadResources();
+        // Cache does not have to be thread-safe, it is only used by one thread at the same time
+        resources.termInfoCache = new ReuseLRUCache(1024);
+        threadResources.set(resources);
+      }
+      return resources;
+    }
+    
+    public TermsEnum iterator() throws IOException {
+      return new SegmentTermsEnum();
+    }
+
+    public long getUniqueTermCount() {
+      return numTerms;
+    }
+
+    // Iterates through terms in this field
+    private class SegmentTermsEnum extends TermsEnum {
+      private final IndexInput in;
+      private final DeltaBytesReader bytesReader;
+      // nocommit: long?
+      private int termUpto;
+      private final DocsProducer.Reader docs;
+      private int docFreq;
+      private final StandardTermsIndexReader.TermsIndexResult indexResult = new StandardTermsIndexReader.TermsIndexResult();
+      ThreadResources resources = getThreadResources();
+      
+      SegmentTermsEnum() throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("tdr " + this + ": CREATE TermsEnum field=" + fieldInfo.name + " startPos=" + termsStartPointer + " seg=" + segment);
+        }
+        in = (IndexInput) StandardTermsDictReader.this.in.clone();
+        in.seek(termsStartPointer);
+        bytesReader = new DeltaBytesReader(in);
+        if (Codec.DEBUG) {
+          System.out.println("  bytesReader=" + bytesReader);
+        }
+        docs = StandardTermsDictReader.this.docs.reader(fieldInfo, in);
+      }
+
+      /** Seeks until the first term that's >= the provided
+       *  text; returns SeekStatus.FOUND if the exact term
+       *  is found, SeekStatus.NOT_FOUND if a different term
+       *  was found, SeekStatus.END if we hit EOF */
+      public SeekStatus seek(TermRef term) throws IOException {
+        ReuseLRUCache cache = null;
+        CacheEntry entry = null;
+
+        if (docs.canCaptureState()) {
+          cache = resources.termInfoCache;
+
+          entry = (CacheEntry) cache.get(term);
+          if (entry != null) {
+            docFreq = entry.freq;
+            bytesReader.term = (TermRef) entry.term.clone();
+            docs.setState(entry.state);
+            termUpto = entry.termUpTo;
+
+            return SeekStatus.FOUND;
+          } 
+        }
+        
+        // mxx
+        if (Codec.DEBUG) {
+          System.out.println(Thread.currentThread().getName() + ":stdr.seek(text=" + fieldInfo.name + ":" + term + ") seg=" + segment);
+        }
+
+        if (bytesReader.started && termUpto < numTerms && bytesReader.term.compareTerm(term) == 0) {
+          // nocommit -- not right if text is ""?
+          // mxx
+          if (Codec.DEBUG) {
+            System.out.println(Thread.currentThread().getName() + ":  already here!");
+          }
+          return SeekStatus.FOUND;
+        }
+
+        // Find latest index term that's <= our text:
+        indexReader.getIndexOffset(term, indexResult);
+
+        // mxx
+        if (Codec.DEBUG) {
+          System.out.println(Thread.currentThread().getName() + ":  index pos=" + indexResult.position + " termFP=" + indexResult.offset + " term=" + indexResult.term + " this=" + this);
+        }
+
+        in.seek(indexResult.offset);
+
+        // NOTE: the first next() after an index seek is
+        // wasteful, since it redundantly reads the same
+        // bytes into the buffer
+        bytesReader.reset(indexResult.term);
+
+        termUpto = indexResult.position;
+        assert termUpto>=0: "termUpto=" + termUpto;
+
+        // mxx
+        if (Codec.DEBUG) {
+          System.out.println(Thread.currentThread().getName() + ":  set termUpto=" + termUpto);
+        }
+
+        // Now, scan:
+
+        //int indexCount = 0;
+        //int lastIndexCount = 0;
+        int scanCnt = 0;
+        while(next() != null) {
+          scanCnt++;
+          final int cmp = bytesReader.term.compareTerm(term);
+          if (cmp == 0) {
+            // mxx
+            if (Codec.DEBUG) {
+              System.out.println(Thread.currentThread().getName() + ":  seek done found term=" + bytesReader.term);
+              //new Throwable().printStackTrace(System.out);
+            }
+        
+            if(docs.canCaptureState() && scanCnt > 1) {
+             if(cache.eldest != null) {
+               entry = (CacheEntry) cache.eldest;
+               cache.eldest = null;
+               entry.state = docs.captureState(entry.state);
+              } else {
+                entry = new CacheEntry();
+                entry.state = docs.captureState(null);
+              }
+              entry.freq = docFreq;
+              entry.termUpTo = termUpto;
+            
+              entry.term = (TermRef) bytesReader.term.clone();
+             
+              cache.put(entry.term, entry);
+            }
+            return SeekStatus.FOUND;
+          } else if (cmp > 0) {
+            // mxx
+            if (Codec.DEBUG) {
+              System.out.println(Thread.currentThread().getName() + ":  seek done did not find term=" + term + " found instead: " + bytesReader.term);
+            }
+            return SeekStatus.NOT_FOUND;
+          }
+
+          // We should not cross another indexed term while
+          // scanning:
+
+          // nocommit -- not correct that we call
+          // isIndexTerm, twice
+          //indexCount += indexReader.isIndexTerm(termUpto, docFreq) ? 1:0;
+          //assert lastIndexCount < indexDivisor: " indexCount=" + lastIndexCount + " indexDivisor=" + indexDivisor;
+          //lastIndexCount = indexCount;
+
+          // mxx
+          //System.out.println(Thread.currentThread().getName() + ":  cycle");
+        }
+
+        // mxx
+        if (Codec.DEBUG) {
+          System.out.println(Thread.currentThread().getName() + ": seek done did not find term=" + term + ": hit EOF");
+        }
+        return SeekStatus.END;
+      }
+
+      public SeekStatus seek(long pos) throws IOException {
+        if (pos >= numTerms) {
+          return SeekStatus.END;
+        }
+        indexReader.getIndexOffset(pos, indexResult);
+        in.seek(indexResult.offset);
+
+        // NOTE: the first next() after an index seek is
+        // wasteful, since it redundantly reads the same
+        // bytes into the buffer
+        bytesReader.reset(indexResult.term);
+
+        termUpto = indexResult.position;
+        assert termUpto>=0: "termUpto=" + termUpto;
+
+        // Now, scan:
+        int left = (int) (1 + pos - termUpto);
+        while(left > 0) {
+          TermRef term = next();
+          assert term != null;
+          left--;
+        }
+
+        // always found
+        return SeekStatus.FOUND;
+      }
+
+      public TermRef term() {
+        return bytesReader.term;
+      }
+
+      public long ord() {
+        return termUpto;
+      }
+
+      public TermRef next() throws IOException {
+        if (termUpto >= numTerms) {
+          return null;
+        }
+        if (Codec.DEBUG) {
+          System.out.println("tdr.next: field=" + fieldInfo.name + " termsInPointer=" + in.getFilePointer() + " vs len=" + in.length() + " seg=" + segment);
+          //new Throwable().printStackTrace(System.out);
+        }
+        bytesReader.read();
+        docFreq = in.readVInt();
+        if (Codec.DEBUG) {
+          System.out.println("  text=" + bytesReader.term + " freq=" + docFreq);
+        }
+        // TODO: would be cleaner, but space-wasting, to
+        // simply record a bit into each index entry as to
+        // whether it's an index entry or not... or,
+        // possibly store a "how many terms until next index
+        // entry" in each index entry, but that'd require
+        // some tricky lookahead work when writing the index
+        final boolean isIndex = indexReader.isIndexTerm(termUpto, docFreq);
+
+        // mxx
+        // System.out.println(Thread.currentThread().getName() + ": isIndex=" + isIndex);
+
+        docs.readTerm(docFreq, isIndex);
+        termUpto++;
+        if (Codec.DEBUG) {
+          System.out.println("  termUpto=" + termUpto + " vs numTerms=" + numTerms + " fp=" + in.getFilePointer());
+        }
+        return bytesReader.term;
+      }
+
+      public int docFreq() {
+        return docFreq;
+      }
+
+      public DocsEnum docs(Bits skipDocs) throws IOException {
+        // nocommit
+        if (Codec.DEBUG) {
+          System.out.println("stdr.docs");
+        }
+        DocsEnum docsEnum = docs.docs(skipDocs);
+        if (Codec.DEBUG) {
+          docsEnum.desc = fieldInfo.name + ":" + bytesReader.term;
+        }
+        return docsEnum;
+      }
+    }
+  }
+
+  private class CacheEntry {
+    int termUpTo;
+    int freq;
+    State state;
+    TermRef term;
+  }
+  
+  /**
+   * Per-thread resources managed by ThreadLocal
+   */
+  private final class ThreadResources {
+    // Used for caching the least recently looked-up Terms
+    ReuseLRUCache termInfoCache;
+  }
+  
+  private class ReuseLRUCache extends LinkedHashMap {
+    
+    private final static float LOADFACTOR = 0.75f;
+    private int cacheSize;
+    Object eldest;
+
+    /**
+     * Creates a last-recently-used cache with the specified size. 
+     */
+    public ReuseLRUCache(int cacheSize) {
+      super((int) Math.ceil(cacheSize/ LOADFACTOR) + 1, LOADFACTOR, true);
+      this.cacheSize = cacheSize;
+    }
+    
+    protected boolean removeEldestEntry(Map.Entry eldest) {
+      boolean remove = size() > ReuseLRUCache.this.cacheSize;
+      if(remove) {
+        this.eldest = eldest.getValue();
+      } 
+      return remove;
+    }
+    
+  }
+
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,221 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.TermRef;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.UnicodeUtil;
+
+/**
+ * Writes terms dict and interacts with docs/positions
+ * consumers to write the postings files.
+ *
+ * The [new] terms dict format is field-centric: each field
+ * has its own section in the file.  Fields are written in
+ * UTF16 string comparison order.  Within each field, each
+ * term's text is written in UTF16 string comparison order.
+ */
+
+public class StandardTermsDictWriter extends FieldsConsumer {
+
+  final static String CODEC_NAME = "STANDARD_TERMS_DICT";
+
+  // Initial format
+  public static final int VERSION_START = 0;
+
+  public static final int VERSION_CURRENT = VERSION_START;
+
+  private final DeltaBytesWriter termWriter;
+
+  final IndexOutput out;
+  final DocsConsumer consumer;
+  final FieldInfos fieldInfos;
+  FieldInfo currentField;
+  private final StandardTermsIndexWriter indexWriter;
+  private final List<TermsConsumer> fields = new ArrayList<TermsConsumer>();
+
+  // nocommit
+  private String segment;
+
+  public StandardTermsDictWriter(StandardTermsIndexWriter indexWriter, SegmentWriteState state, DocsConsumer consumer) throws IOException {
+    final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, StandardCodec.TERMS_EXTENSION);
+    this.indexWriter = indexWriter;
+    out = state.directory.createOutput(termsFileName);
+    indexWriter.setTermsOutput(out);
+    state.flushedFiles.add(termsFileName);
+    this.segment = state.segmentName;
+
+    if (Codec.DEBUG) {
+      System.out.println("stdw: write to segment=" + state.segmentName);
+    }
+
+    fieldInfos = state.fieldInfos;
+
+    // Count indexed fields up front
+    final int numFields = fieldInfos.size();
+    Codec.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
+
+    out.writeLong(0);                             // leave space for end index pointer
+
+    termWriter = new DeltaBytesWriter(out);
+    currentField = null;
+    this.consumer = consumer;
+
+    consumer.start(out);                          // have consumer write its format/header
+  }
+
+  public TermsConsumer addField(FieldInfo field) {
+    if (Codec.DEBUG) {
+      System.out.println("stdw.addField: field=" + field.name);
+    }
+    assert currentField == null || currentField.name.compareTo(field.name) < 0;
+    currentField = field;
+    StandardTermsIndexWriter.FieldWriter fieldIndexWriter = indexWriter.addField(field);
+    TermsConsumer terms = new TermsWriter(fieldIndexWriter, field, consumer);
+    fields.add(terms);
+    return terms;
+  }
+  
+  public void close() throws IOException {
+
+    if (Codec.DEBUG)
+      System.out.println("stdw.close seg=" + segment);
+
+    try {
+      final int fieldCount = fields.size();
+
+      if (Codec.DEBUG)
+        System.out.println("  numFields=" + fieldCount);
+
+      final long dirStart = out.getFilePointer();
+
+      out.writeInt(fieldCount);
+      for(int i=0;i<fieldCount;i++) {
+        TermsWriter field = (TermsWriter) fields.get(i);
+        out.writeInt(field.fieldInfo.number);
+        out.writeLong(field.numTerms);
+        out.writeLong(field.termsStartPointer);
+        if (Codec.DEBUG)
+          System.out.println("stdw.close: field=" + field.fieldInfo.name + " numTerms=" + field.numTerms + " tis pointer=" + field.termsStartPointer);
+      }
+      out.seek(Codec.headerSize(CODEC_NAME));
+      out.writeLong(dirStart);
+    } finally {
+      try {
+        out.close();
+      } finally {
+        try {
+          consumer.close();
+        } finally {
+          indexWriter.close();
+        }
+      }
+    }
+  }
+
+  private final UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
+
+  long lastIndexPointer;
+
+  class TermsWriter extends TermsConsumer {
+    final FieldInfo fieldInfo;
+    final DocsConsumer consumer;
+    final long termsStartPointer;
+    int numTerms;
+    final StandardTermsIndexWriter.FieldWriter fieldIndexWriter;
+
+    TermsWriter(StandardTermsIndexWriter.FieldWriter fieldIndexWriter, FieldInfo fieldInfo, DocsConsumer consumer) {
+      this.fieldInfo = fieldInfo;
+      this.consumer = consumer;
+      this.fieldIndexWriter = fieldIndexWriter;
+
+      termWriter.reset();
+      termsStartPointer = out.getFilePointer();
+      consumer.setField(fieldInfo);
+      lastIndexPointer = termsStartPointer;
+
+      if (Codec.DEBUG) {
+        System.out.println("stdw: now write field=" + fieldInfo.name);
+      }
+    }
+    
+    public DocsConsumer startTerm(char[] text, int start) throws IOException {
+      consumer.startTerm();
+      if (Codec.DEBUG) {
+        // nocommit
+        int len = 0;
+        while(text[start+len] != 0xffff) {
+          len++;
+        }
+        consumer.desc = fieldInfo.name + ":" + new String(text, start, len);
+        System.out.println("stdw.startTerm term=" + fieldInfo.name + ":" + new String(text, start, len) + " seg=" + segment);
+      }
+      return consumer;
+    }
+
+    public void finishTerm(char[] text, int start, int numDocs) throws IOException {
+
+      // mxx
+      if (Codec.DEBUG) {
+        // nocommit
+        int len = 0;
+        while(text[start+len] != 0xffff) {
+          len++;
+        }
+        System.out.println(Thread.currentThread().getName() + ": stdw.finishTerm seg=" + segment + " text=" + fieldInfo.name + ":" + new String(text, start, len) + " numDocs=" + numDocs + " numTerms=" + numTerms);
+      }
+
+      if (numDocs > 0) {
+        // TODO: we could do this incrementally
+        UnicodeUtil.UTF16toUTF8(text, start, utf8);
+
+        final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(utf8.result, utf8.length, numDocs);
+
+        // mxx
+        if (Codec.DEBUG) {
+          System.out.println(Thread.currentThread().getName() + ":  filePointer=" + out.getFilePointer() + " isIndexTerm?=" + isIndexTerm);
+          TermRef tr = new TermRef();
+          tr.bytes = utf8.result;
+          tr.length = utf8.length;
+          System.out.println("  term bytes=" + tr.toBytesString());
+        }
+        termWriter.write(utf8.result, utf8.length);
+        out.writeVInt(numDocs);
+
+        consumer.finishTerm(numDocs, isIndexTerm);
+        numTerms++;
+      }
+    }
+
+    // Finishes all terms in this field
+    public void finish() {
+    }
+  }
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,71 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.TermRef;
+
+import java.io.IOException;
+import java.util.Collection;
+
+
+// TODO
+//   - allow for non-regular index intervals?  eg with a
+//     long string of rare terms, you don't need such
+//     frequent indexing
+
+/**
+ * TermsDictReader interacts with an instance of this class
+ * to manage its terms index.  The writer must accept
+ * indexed terms (many pairs of CharSequence text + long
+ * fileOffset), and then this reader must be able to
+ * retrieve the nearest index term to a provided term
+ * text. */
+
+public abstract class StandardTermsIndexReader {
+
+  static class TermsIndexResult {
+    int position;
+    final TermRef term = new TermRef();
+    long offset;
+  };
+
+  public abstract class FieldReader {
+    /** Returns position of "largest" index term that's <=
+     *  text.  Returned TermsIndexResult may be reused
+     *  across calls.  This resets internal state, and
+     *  expects that you'll then scan the file and
+     *  sequentially call isIndexTerm for each term
+     *  encountered. */
+    public abstract void getIndexOffset(TermRef term, TermsIndexResult result) throws IOException;
+
+    public abstract void getIndexOffset(long ord, TermsIndexResult result) throws IOException;
+
+    /** Call this sequentially for each term encoutered,
+     *  after calling {@link #getIndexOffset}. */
+    public abstract boolean isIndexTerm(int position, int docFreq) throws IOException;
+  }
+
+  public abstract FieldReader getField(FieldInfo fieldInfo);
+
+  public abstract void loadTermsIndex() throws IOException;
+
+  public abstract void close() throws IOException;
+
+  public abstract void getExtensions(Collection extensions);
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,35 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.FieldInfo;
+import java.io.IOException;
+
+public abstract class StandardTermsIndexWriter {
+
+  public abstract void setTermsOutput(IndexOutput out);
+
+  public abstract class FieldWriter {
+    public abstract boolean checkIndexTerm(byte[] bytes, int length, int docFreq) throws IOException;
+  }
+
+  public abstract FieldWriter addField(FieldInfo fieldInfo);
+
+  public abstract void close() throws IOException;
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/BooleanScorer2.java Tue Oct 13 20:44:51 2009
@@ -292,6 +292,7 @@
   public float score() throws IOException {
     coordinator.nrMatchers = 0;
     float sum = countingSumScorer.score();
+    assert coordinator.nrMatchers >= 0;
     return sum * coordinator.coordFactors[coordinator.nrMatchers];
   }
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/ExactPhraseScorer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/ExactPhraseScorer.java Tue Oct 13 20:44:51 2009
@@ -22,9 +22,9 @@
 
 final class ExactPhraseScorer extends PhraseScorer {
 
-  ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets,
+  ExactPhraseScorer(Weight weight, DocsEnum[] docs, int[] offsets,
       Similarity similarity, byte[] norms) {
-    super(weight, tps, offsets, similarity, norms);
+    super(weight, docs, offsets, similarity, norms);
   }
 
   protected final float phraseFreq() throws IOException {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCache.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCache.java Tue Oct 13 20:44:51 2009
@@ -20,6 +20,7 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.index.TermRef;
 import org.apache.lucene.document.NumericField; // for javadocs
 import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
 
@@ -100,7 +101,7 @@
    */
   public interface ByteParser extends Parser {
     /** Return a single Byte representation of this field's value. */
-    public byte parseByte(String string);
+    public byte parseByte(TermRef term);
   }
 
   /** Interface to parse shorts from document fields.
@@ -108,7 +109,7 @@
    */
   public interface ShortParser extends Parser {
     /** Return a short representation of this field's value. */
-    public short parseShort(String string);
+    public short parseShort(TermRef term);
   }
 
   /** Interface to parse ints from document fields.
@@ -116,7 +117,7 @@
    */
   public interface IntParser extends Parser {
     /** Return an integer representation of this field's value. */
-    public int parseInt(String string);
+    public int parseInt(TermRef term);
   }
 
   /** Interface to parse floats from document fields.
@@ -124,7 +125,7 @@
    */
   public interface FloatParser extends Parser {
     /** Return an float representation of this field's value. */
-    public float parseFloat(String string);
+    public float parseFloat(TermRef term);
   }
 
   /** Interface to parse long from document fields.
@@ -132,7 +133,7 @@
    */
   public interface LongParser extends Parser {
     /** Return an long representation of this field's value. */
-    public long parseLong(String string);
+    public long parseLong(TermRef term);
   }
 
   /** Interface to parse doubles from document fields.
@@ -140,16 +141,21 @@
    */
   public interface DoubleParser extends Parser {
     /** Return an long representation of this field's value. */
-    public double parseDouble(String string);
+    public double parseDouble(TermRef term);
   }
 
   /** Expert: The cache used internally by sorting and range query classes. */
   public static FieldCache DEFAULT = new FieldCacheImpl();
-  
+
   /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */
   public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() {
-    public byte parseByte(String value) {
-      return Byte.parseByte(value);
+    public byte parseByte(TermRef term) {
+      final long num = FieldCacheImpl.parseLong(term);
+      if (num >= Byte.MIN_VALUE && num <= Byte.MAX_VALUE) {
+        return (byte) num;
+      } else {
+        throw new IllegalArgumentException("value \"" + term + "\" is out of bounds for Byte");
+      }
     }
     protected Object readResolve() {
       return DEFAULT_BYTE_PARSER;
@@ -161,8 +167,13 @@
 
   /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
   public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() {
-    public short parseShort(String value) {
-      return Short.parseShort(value);
+    public short parseShort(TermRef term) {
+      final long num = FieldCacheImpl.parseLong(term);
+      if (num >= Short.MIN_VALUE && num <= Short.MAX_VALUE) {
+        return (short) num;
+      } else {
+        throw new IllegalArgumentException("value \"" + term + "\" is out of bounds for Short");
+      }
     }
     protected Object readResolve() {
       return DEFAULT_SHORT_PARSER;
@@ -174,8 +185,13 @@
 
   /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
   public static final IntParser DEFAULT_INT_PARSER = new IntParser() {
-    public int parseInt(String value) {
-      return Integer.parseInt(value);
+    public int parseInt(TermRef term) {
+      final long num = FieldCacheImpl.parseLong(term);
+      if (num >= Integer.MIN_VALUE && num <= Integer.MAX_VALUE) {
+        return (int) num;
+      } else {
+        throw new IllegalArgumentException("value \"" + term + "\" is out of bounds for Int");
+      }
     }
     protected Object readResolve() {
       return DEFAULT_INT_PARSER;
@@ -187,8 +203,10 @@
 
   /** The default parser for float values, which are encoded by {@link Float#toString(float)} */
   public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() {
-    public float parseFloat(String value) {
-      return Float.parseFloat(value);
+    public float parseFloat(TermRef term) {
+      // TODO: would be far better to directly parse
+      // the UTF-8 bytes into float, but that's tricky?
+      return Float.parseFloat(term.toString());
     }
     protected Object readResolve() {
       return DEFAULT_FLOAT_PARSER;
@@ -200,8 +218,8 @@
 
   /** The default parser for long values, which are encoded by {@link Long#toString(long)} */
   public static final LongParser DEFAULT_LONG_PARSER = new LongParser() {
-    public long parseLong(String value) {
-      return Long.parseLong(value);
+    public long parseLong(TermRef term) {
+      return FieldCacheImpl.parseLong(term);
     }
     protected Object readResolve() {
       return DEFAULT_LONG_PARSER;
@@ -213,8 +231,10 @@
 
   /** The default parser for double values, which are encoded by {@link Double#toString(double)} */
   public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() {
-    public double parseDouble(String value) {
-      return Double.parseDouble(value);
+    public double parseDouble(TermRef term) {
+      // TODO: would be far better to directly parse
+      // the UTF-8 bytes into float, but that's tricky?
+      return Double.parseDouble(term.toString());
     }
     protected Object readResolve() {
       return DEFAULT_DOUBLE_PARSER;
@@ -229,8 +249,8 @@
    * via {@link NumericField}/{@link NumericTokenStream}.
    */
   public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
-    public int parseInt(String val) {
-      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
+    public int parseInt(TermRef val) {
+      final int shift = val.bytes[val.offset]-NumericUtils.SHIFT_START_INT;
       if (shift>0 && shift<=31)
         throw new FieldCacheImpl.StopFillCacheException();
       return NumericUtils.prefixCodedToInt(val);
@@ -248,11 +268,11 @@
    * via {@link NumericField}/{@link NumericTokenStream}.
    */
   public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
-    public float parseFloat(String val) {
-      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
+    public float parseFloat(TermRef term) {
+      final int shift = term.bytes[term.offset]-NumericUtils.SHIFT_START_INT;
       if (shift>0 && shift<=31)
         throw new FieldCacheImpl.StopFillCacheException();
-      return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val));
+      return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term));
     }
     protected Object readResolve() {
       return NUMERIC_UTILS_FLOAT_PARSER;
@@ -267,11 +287,11 @@
    * via {@link NumericField}/{@link NumericTokenStream}.
    */
   public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
-    public long parseLong(String val) {
-      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
+    public long parseLong(TermRef term) {
+      final int shift = term.bytes[term.offset]-NumericUtils.SHIFT_START_LONG;
       if (shift>0 && shift<=63)
         throw new FieldCacheImpl.StopFillCacheException();
-      return NumericUtils.prefixCodedToLong(val);
+      return NumericUtils.prefixCodedToLong(term);
     }
     protected Object readResolve() {
       return NUMERIC_UTILS_LONG_PARSER;
@@ -286,11 +306,11 @@
    * via {@link NumericField}/{@link NumericTokenStream}.
    */
   public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
-    public double parseDouble(String val) {
-      final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
+    public double parseDouble(TermRef term) {
+      final int shift = term.bytes[term.offset]-NumericUtils.SHIFT_START_LONG;
       if (shift>0 && shift<=63)
         throw new FieldCacheImpl.StopFillCacheException();
-      return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val));
+      return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term));
     }
     protected Object readResolve() {
       return NUMERIC_UTILS_DOUBLE_PARSER;



Mime
View raw message