lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r824918 [7/11] - in /lucene/java/branches/flex_1458: contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/ contrib/benchmark/src/test/org/apache/lucene/benc...
Date Tue, 13 Oct 2009 20:44:59 GMT
Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,308 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.PositionsEnum;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.PositionsProducer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+public class SepPositionsReader extends PositionsProducer {
+  
+  final IntIndexInput posIn;
+
+  final IndexInput payloadIn;
+
+  IndexInput termsIn;
+
+  public SepPositionsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory) throws IOException {
+    assert segmentInfo.getHasProx();
+    boolean success = false;
+    try {
+      posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.POS_EXTENSION), readBufferSize);
+      payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.PAYLOAD_EXTENSION), readBufferSize);
+      success = true;
+    } finally {
+      if (!success) {
+        close();
+      }
+    }
+  }
+
+  public void start(IndexInput termsIn) throws IOException {
+    this.termsIn = termsIn;
+
+    // nocomit -- move these 2 constants into XXXCodec?
+    Codec.checkHeader(termsIn, SepPositionsWriter.CODEC, SepPositionsWriter.VERSION_START);
+  }
+
+  static void files(SegmentInfo segmentInfo, Collection files) {
+    if (segmentInfo.getHasProx()) {
+      files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.POS_EXTENSION));
+      files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.PAYLOAD_EXTENSION));
+    }
+  }
+
+  public Reader reader(FieldInfo fieldInfo, IndexInput termsIn) throws IOException {
+    return new TermsDictReader(termsIn, fieldInfo);
+  }
+
+  public void close() throws IOException {
+    try {
+      if (posIn != null)
+        posIn.close();
+    } finally {
+      if (payloadIn != null)
+        payloadIn.close();
+    }
+  }
+
+  class TermsDictReader extends Reader {
+
+    final IndexInput termsIn;
+    final IntIndexInput.Reader posIn;
+    final IntIndexInput.Index posIndex;
+    
+    final FieldInfo fieldInfo;
+    long payloadOffset;
+
+    TermsDictReader(IndexInput termsIn, FieldInfo fieldInfo) throws IOException {
+      this.termsIn = termsIn;
+      this.fieldInfo = fieldInfo;
+      this.posIn = SepPositionsReader.this.posIn.reader();
+      posIndex = SepPositionsReader.this.posIn.index();
+    }
+
+    public IntIndexInput getPosIn() {
+      return SepPositionsReader.this.posIn;
+    }
+
+    public void readTerm(int docFreq, boolean isIndexTerm) throws IOException {
+      if (Codec.DEBUG) {
+        System.out.println("    pr.readterm termsInPointer=" + termsIn.getFilePointer() + " isIndex=" + isIndexTerm);
+      }
+      posIndex.read(termsIn, isIndexTerm);
+      if (isIndexTerm) {
+        payloadOffset = termsIn.readVLong();
+      } else {
+        payloadOffset += termsIn.readVLong();
+      }
+      if (Codec.DEBUG) {
+        System.out.println("      posIndex=" + posIndex + " payloadOffset=" + payloadOffset);
+      }
+      if (positions != null) {
+        positions.seek(posIndex, payloadOffset, -1);
+      }
+    }
+
+    SegmentPositionsEnum positions;
+
+    public PositionsEnum positions() throws IOException {
+
+      if (positions == null) {
+        // Lazy init
+        positions = new SegmentPositionsEnum(posIndex, payloadOffset);
+      }
+
+      return positions;
+    }
+
+    // nocommit -- should we have different reader for
+    // payload vs no payload?
+    class SegmentPositionsEnum extends PositionsEnum {
+
+      // nocommit
+      String desc;
+
+      //final IntIndexInput posIn;
+      final IndexInput payloadIn;
+      final IntIndexInput.Index pendingPosIndex;
+
+      final boolean storePayloads;
+
+      boolean payloadPending;                     // True if we must skip payload beore reading next position
+
+      long payloadOffset;
+
+      int position;
+      int payloadLength;
+      int posSkipCount;
+
+      private boolean seekPending;
+
+      SegmentPositionsEnum(IntIndexInput.Index posIndex, long payloadOffset) throws IOException {
+        //posIn = SepPositionsReader.this.posIn.reader();
+        this.payloadOffset = payloadOffset;
+        pendingPosIndex = SepPositionsReader.this.posIn.index();
+        pendingPosIndex.set(posIndex);
+        seekPending = true;
+
+        if (Codec.DEBUG) {
+          System.out.println("new pos enum seekPending=true posIndex=" + pendingPosIndex);
+        }
+        storePayloads = fieldInfo.storePayloads;
+        if (storePayloads) {
+          payloadIn = (IndexInput) SepPositionsReader.this.payloadIn.clone();
+        } else {
+          payloadIn = null;
+        }
+      }
+
+      public void seek(IntIndexInput.Index posIndex, long payloadOffset, int payloadLength) {
+        if (Codec.DEBUG) {
+          System.out.println("spr.seek posIndex=" + posIndex);
+        }
+        pendingPosIndex.set(posIndex);
+        this.payloadOffset = payloadOffset;
+        this.payloadLength = payloadLength;
+        posSkipCount = 0;
+        seekPending = true;
+      }
+
+      // Cumulative on top of a previons Index seek
+      public void seek(int posCount) {
+        posSkipCount += posCount;
+        if (Codec.DEBUG) {
+          System.out.println("pr [" + desc + "] skip " + posCount + " positions; now " + posSkipCount);
+        }
+      }
+
+      void catchUp(int currentCount) throws IOException {
+        if (Codec.DEBUG) {
+          System.out.println("pos catchup [" + desc + "]: seekPending=" + seekPending + " seekPosIndex=" + pendingPosIndex + " payloadPending=" + payloadPending + " payloadFP=" + payloadOffset + " skipPosCount " + posSkipCount + " vs currentCount " + currentCount);
+        }
+
+        if (seekPending) {
+          pendingPosIndex.seek(posIn);
+          if (storePayloads) {
+            payloadIn.seek(payloadOffset);
+          }
+          payloadPending = false;
+          seekPending = false;
+        }
+
+        while(posSkipCount > currentCount) {
+          next();
+        }
+
+        if (Codec.DEBUG) {
+          System.out.println("  pos catchup done");
+        }
+        position = 0;
+      }
+
+      public int next() throws IOException {
+
+        if (Codec.DEBUG) {
+          System.out.println("pr.next [" + desc + "]: posFP=" + posIn.descFilePointer() + getPayloadFP());
+        }
+
+        final int code = posIn.next();
+
+        if (storePayloads) {
+
+          if (payloadPending && payloadLength > 0) {
+            if (Codec.DEBUG) {
+              System.out.println("  payload pending: skip " + payloadLength + " bytes");
+            }
+            // nocommit: do this lazily, when getPayload()
+            // is called
+            payloadIn.seek(payloadIn.getFilePointer()+payloadLength);
+          }
+
+          if ((code & 1) != 0) {
+            // Payload length has changed
+            payloadLength = posIn.next();
+            assert payloadLength >= 0;
+            if (Codec.DEBUG) {
+              System.out.println("  new payloadLen=" + payloadLength);
+            }
+          }
+          assert payloadLength != -1;
+          
+          payloadPending = true;
+          position += code >>> 1;
+        } else {
+          position += code;
+        }
+
+        posSkipCount--;
+
+        // NOTE: the old API actually allowed this... and some tests actually did it
+        assert posSkipCount >= 0: "next() was called too many times (more than FormatPostingsDocsEnum.freq() times)";
+
+        if (Codec.DEBUG) {
+          System.out.println("  proxFP=" + posIn.descFilePointer() + getPayloadFP() + " return pos=" + position);
+        }
+
+        return position;
+      }
+
+      // debugging only
+      private String getPayloadFP() {
+        if (payloadIn != null) {
+          return " payloadFP=" + payloadIn.getFilePointer();
+        } else {
+          return " payloadFP=null";
+        }
+      }
+
+      public int getPayloadLength() {
+        return payloadLength;
+      }
+
+      public byte[] getPayload(byte[] data, int offset) throws IOException {
+
+        if (!payloadPending) {
+          throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+        }
+
+        if (Codec.DEBUG) {
+          System.out.println("   getPayload payloadFP=" + payloadIn.getFilePointer() + " len=" + payloadLength);
+        }
+
+        final byte[] retArray;
+        final int retOffset;
+        if (data == null || data.length-offset < payloadLength) {
+          // the array is too small to store the payload data,
+          // so we allocate a new one
+          retArray = new byte[payloadLength];
+          retOffset = 0;
+        } else {
+          retArray = data;
+          retOffset = offset;
+        }
+
+        payloadIn.readBytes(retArray, retOffset, payloadLength);
+        payloadPending = false;
+        return retArray;
+      }
+      
+      public boolean hasPayload() {
+        return payloadPending && payloadLength > 0;
+      }
+    }
+  }
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,195 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.codecs.PositionsConsumer;
+import org.apache.lucene.index.codecs.Codec;
+
+public final class SepPositionsWriter extends PositionsConsumer {
+
+  final static String CODEC = "SepPositionsPayloads";
+
+  // Increment version to change it:
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  final SepDocsWriter parent;
+  final IntIndexOutput posOut;
+  final IntIndexOutput.Index posIndex;
+  final IndexOutput payloadOut;
+
+  IndexOutput termsOut;
+
+  boolean omitTF;
+  boolean storePayloads;
+  int lastPayloadLength = -1;
+
+  // nocommit
+  String desc;
+
+  public SepPositionsWriter(SegmentWriteState state, SepDocsWriter parent, IntStreamFactory factory) throws IOException {
+    this.parent = parent;
+    omitTF = parent.omitTF;
+    if (Codec.DEBUG) {
+      System.out.println("spw.create seg=" + state.segmentName + " dir=" + state.directory);
+    }
+    if (state.fieldInfos.hasProx()) {
+      // At least one field does not omit TF, so create the
+
+      // prox file
+      final String proxFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.POS_EXTENSION);
+      posOut = factory.createOutput(state.directory, proxFileName);
+      state.flushedFiles.add(proxFileName);
+      posIndex = posOut.index();
+
+      // nocommit -- only if at least one field stores
+      // payloads?
+      boolean success = false;
+      final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.PAYLOAD_EXTENSION);
+      try {
+        payloadOut = state.directory.createOutput(payloadFileName);
+        success = true;
+      } finally {
+        if (!success) {
+          posOut.close();
+        }
+      }
+      state.flushedFiles.add(payloadFileName);
+
+      if (Codec.DEBUG) {
+        System.out.println("  hasProx create pos=" + proxFileName + " payload=" + payloadFileName);
+      }
+
+      parent.skipListWriter.setPosOutput(posOut);
+      parent.skipListWriter.setPayloadOutput(payloadOut);
+    } else {
+      if (Codec.DEBUG) {
+        System.out.println("  no prox");
+      }
+      // Every field omits TF so we will write no prox file
+      posIndex = null;
+      posOut = null;
+      payloadOut = null;
+    }
+  }
+
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+  }
+
+  long payloadStart;
+  long lastPayloadStart;
+
+  public void startTerm() throws IOException {
+    posIndex.mark();
+    payloadStart = payloadOut.getFilePointer();
+    lastPayloadLength = -1;
+  }
+
+  int lastPosition;
+
+  /** Add a new position & payload */
+  public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException {
+    assert !omitTF: "omitTF is true";
+    assert posOut != null;
+    if (Codec.DEBUG) {
+      if (payload != null) {
+        System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer() + " payload=" + payloadLength + " bytes");
+      } else {
+        System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer());
+      }
+    }
+
+    final int delta = position - lastPosition;
+    lastPosition = position;
+
+    if (storePayloads) {
+      if (Codec.DEBUG) {
+        System.out.println("  store payload len=" + payloadLength);
+      }
+      if (payloadLength != lastPayloadLength) {
+        if (Codec.DEBUG) {
+          System.out.println("  payload len change old=" + lastPayloadLength + " new=" + payloadLength);
+        }
+        lastPayloadLength = payloadLength;
+        // TODO: explore whether we get better compression
+        // by not storing payloadLength into prox stream?
+        posOut.write((delta<<1)|1);
+        posOut.write(payloadLength);
+      } else {
+        posOut.write(delta << 1);
+      }
+
+      if (payloadLength > 0) {
+        if (Codec.DEBUG) {
+          System.out.println("  write @ payloadFP=" + payloadOut.getFilePointer());
+        }
+        payloadOut.writeBytes(payload, payloadLength);
+      }
+    } else {
+      posOut.write(delta);
+    }
+  }
+
+  void setField(FieldInfo fieldInfo) {
+    omitTF = fieldInfo.omitTermFreqAndPositions;
+    storePayloads = omitTF ? false : fieldInfo.storePayloads;
+  }
+
+  /** Called when we are done adding positions & payloads */
+  public void finishDoc() {       
+    lastPosition = 0;
+  }
+
+  public void finishTerm(boolean isIndexTerm) throws IOException {
+    assert !omitTF;
+
+    if (Codec.DEBUG) {
+      System.out.println("poswriter finishTerm isIndex=" + isIndexTerm + " pointer=" + termsOut.getFilePointer());
+    }
+
+    posIndex.write(termsOut, isIndexTerm);
+    if (isIndexTerm) {
+      // Write absolute at seek points
+      termsOut.writeVLong(payloadStart);
+    } else {
+      termsOut.writeVLong(payloadStart-lastPayloadStart);
+    }
+
+    lastPayloadStart = payloadStart;
+  }
+
+  public void close() throws IOException {
+    try {
+      if (posOut != null) {
+        posOut.close();
+      }
+    } finally {
+      if (payloadOut != null) {
+        payloadOut.close();
+      }
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,231 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
+
+/**
+ * Implements the skip list reader for the default posting list format
+ * that stores positions and payloads.
+ */
+
+// TODO: rewrite this as recursive classes?
+class SepSkipListReader extends MultiLevelSkipListReader {
+  private boolean currentFieldStoresPayloads;
+  private IntIndexInput.Index freqIndex[];
+  private IntIndexInput.Index docIndex[];
+  private IntIndexInput.Index posIndex[];
+  private long payloadPointer[];
+  private int payloadLength[];
+
+  private final IntIndexInput.Index lastFreqIndex;
+  private final IntIndexInput.Index lastDocIndex;
+  // nocommit -- make private again
+  final IntIndexInput.Index lastPosIndex;
+  
+  private long lastFreqPointer;
+  private long lastDocPointer;
+  private long lastPosPointer;
+  private long lastPayloadPointer;
+  private int lastPayloadLength;
+  private int lastChildLevel;
+                           
+  SepSkipListReader(IndexInput skipStream,
+                    IntIndexInput freqIn,
+                    IntIndexInput docIn,
+                    IntIndexInput posIn,
+                    int maxSkipLevels,
+                    int skipInterval)
+    throws IOException {
+    super(skipStream, maxSkipLevels, skipInterval);
+    if (freqIn != null) {
+      freqIndex = new IntIndexInput.Index[maxSkipLevels];
+    }
+    docIndex = new IntIndexInput.Index[maxSkipLevels];
+    if (posIn != null) {
+      posIndex = new IntIndexInput.Index[maxNumberOfSkipLevels];
+    }
+    for(int i=0;i<maxSkipLevels;i++) {
+      if (freqIn != null) {
+        freqIndex[i] = freqIn.index();
+        if (Codec.DEBUG) {
+          freqIndex[i].desc = "sslr.freq.level" + i;
+        }
+      }
+      docIndex[i] = docIn.index();
+      if (Codec.DEBUG) {
+        docIndex[i].desc = "sslr.doc.level" + i;
+      }
+      if (posIn != null) {
+        posIndex[i] = posIn.index();
+        if (Codec.DEBUG) {
+          posIndex[i].desc = "sslr.pos.level" + i;
+        }
+      }
+    }
+    payloadPointer = new long[maxSkipLevels];
+    payloadLength = new int[maxSkipLevels];
+
+    if (freqIn != null) {
+      lastFreqIndex = freqIn.index();
+    } else {
+      lastFreqIndex = null;
+    }
+    lastDocIndex = docIn.index();
+    if (posIn != null) {
+      lastPosIndex = posIn.index();
+    } else {
+      lastPosIndex = null;
+    }
+  }
+  
+  void init(long skipPointer,
+            IntIndexInput.Index docBaseIndex,
+            IntIndexInput.Index freqBaseIndex,
+            IntIndexInput.Index posBaseIndex,
+            long payloadBasePointer,
+            int df,
+            boolean storesPayloads) {
+
+    super.init(skipPointer, df);
+    this.currentFieldStoresPayloads = storesPayloads;
+
+    if (Codec.DEBUG) {
+      System.out.println("ssr.init docBase=" + docBaseIndex + " freqBase=" + freqBaseIndex + " posBase=" + posBaseIndex + " payloadBase=" + payloadBasePointer + " df=" + df);
+    }
+
+    /*
+    lastFreqPointer = freqBasePointer;
+    lastDocPointer = docBasePointer;
+    lastPosPointer = posBasePointer;
+    */
+
+    lastPayloadPointer = payloadBasePointer;
+
+    for(int i=0;i<maxNumberOfSkipLevels;i++) {
+      docIndex[i].set(docBaseIndex);
+      if (freqIndex != null) {
+        freqIndex[i].set(freqBaseIndex);
+      }
+      if (posBaseIndex != null) {
+        posIndex[i].set(posBaseIndex);
+      }
+    }
+    Arrays.fill(payloadPointer, payloadBasePointer);
+    Arrays.fill(payloadLength, 0);
+  }
+
+  long getPayloadPointer() {
+    return lastPayloadPointer;
+  }
+  
+  /** Returns the payload length of the payload stored just before 
+   * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} 
+   * has skipped.  */
+  int getPayloadLength() {
+    return lastPayloadLength;
+  }
+  
+  protected void seekChild(int level) throws IOException {
+    super.seekChild(level);
+    //freqPointer[level] = lastFreqPointer;
+    //docPointer[level] = lastDocPointer;
+    //posPointer[level] = lastPosPointer;
+    payloadPointer[level] = lastPayloadPointer;
+    payloadLength[level] = lastPayloadLength;
+  }
+  
+  protected void setLastSkipData(int level) {
+    super.setLastSkipData(level);
+
+    lastPayloadPointer = payloadPointer[level];
+    lastPayloadLength = payloadLength[level];
+    if (freqIndex != null) {
+      lastFreqIndex.set(freqIndex[level]);
+    }
+    lastDocIndex.set(docIndex[level]);
+    if (lastPosIndex != null) {
+      lastPosIndex.set(posIndex[level]);
+    }
+
+    if (level > 0) {
+      //lastFreqPointer = freqPointer[level];
+      //lastDocPointer = docPointer[level];
+      //lastPosPointer = posPointer[level];
+      if (freqIndex != null) {
+        freqIndex[level-1].set(freqIndex[level]);
+      }
+      docIndex[level-1].set(docIndex[level]);
+      if (posIndex != null) {
+        posIndex[level-1].set(posIndex[level]);
+      }
+      lastChildLevel = level-1;
+    }
+  }
+
+  IntIndexInput.Index getFreqIndex() {
+    return lastFreqIndex;
+  }
+
+  IntIndexInput.Index getPosIndex() {
+    return lastPosIndex;
+  }
+
+  IntIndexInput.Index getDocIndex() {
+    return lastDocIndex;
+  }
+
+  protected int readSkipData(int level, IndexInput skipStream) throws IOException {
+    int delta;
+    if (currentFieldStoresPayloads) {
+      // the current field stores payloads.
+      // if the doc delta is odd then we have
+      // to read the current payload length
+      // because it differs from the length of the
+      // previous payload
+      delta = skipStream.readVInt();
+      if ((delta & 1) != 0) {
+        payloadLength[level] = skipStream.readVInt();
+      }
+      delta >>>= 1;
+    } else {
+      delta = skipStream.readVInt();
+    }
+    //System.out.println("  delta=" + delta + " level=" +
+    //level);
+    if (freqIndex != null) {
+      freqIndex[level].read(skipStream, false);
+    }
+    docIndex[level].read(skipStream, false);
+    // nocommit -- make this explicit w/ omitTF, matching SepSkipListWriter
+    if (posIndex != null) {
+      posIndex[level].read(skipStream, false);
+      payloadPointer[level] += skipStream.readVInt();
+    }
+    
+    if (Codec.DEBUG) {
+      System.out.println("ssr.readSkipData docDelta=" + delta + " curStoresPayloads=" + currentFieldStoresPayloads + " level=" + level + " freqIndex=" + (freqIndex==null?null:freqIndex[level]) + " docIndex=" + docIndex[level] + " posIndex=" + (posIndex==null? "null" : ""+posIndex[level]) + " payloadPointer=" + payloadPointer[level]);
+    }
+    return delta;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,213 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
+
+// nocommit -- skip data should somehow be more local to the
+// particular stream (doc, freq, pos, payload)
+
+/**
+ * Implements the skip list writer for the default posting list format
+ * that stores positions and payloads.
+ */
+class SepSkipListWriter extends MultiLevelSkipListWriter {
+  private int[] lastSkipDoc;
+  private int[] lastSkipPayloadLength;
+  private long[] lastSkipPayloadPointer;
+
+  private IntIndexOutput.Index[] docIndex;
+  private IntIndexOutput.Index[] freqIndex;
+  private IntIndexOutput.Index[] posIndex;
+  
+  private IntIndexOutput freqOutput;
+  private IntIndexOutput docOutput;
+  // nocommit -- private again
+  IntIndexOutput posOutput;
+  // nocommit -- private again
+  IndexOutput payloadOutput;
+
+  private int curDoc;
+  private boolean curStorePayloads;
+  private int curPayloadLength;
+  private long curPayloadPointer;
+  
+  SepSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount,
+                    IntIndexOutput freqOutput,
+                    IntIndexOutput docOutput,
+                    IntIndexOutput posOutput,
+                    IndexOutput payloadOutput)
+    throws IOException {
+    super(skipInterval, numberOfSkipLevels, docCount);
+
+    this.freqOutput = freqOutput;
+    this.docOutput = docOutput;
+    this.posOutput = posOutput;
+    this.payloadOutput = payloadOutput;
+    
+    lastSkipDoc = new int[numberOfSkipLevels];
+    lastSkipPayloadLength = new int[numberOfSkipLevels];
+    // nocommit -- also cutover normal IndexOutput to use getIndex()?
+    lastSkipPayloadPointer = new long[numberOfSkipLevels];
+
+    freqIndex = new IntIndexOutput.Index[numberOfSkipLevels];
+    docIndex = new IntIndexOutput.Index[numberOfSkipLevels];
+    posIndex = new IntIndexOutput.Index[numberOfSkipLevels];
+
+    for(int i=0;i<numberOfSkipLevels;i++) {
+      freqIndex[i] = freqOutput.index();
+      if (Codec.DEBUG) {
+        freqIndex[i].desc = "sslw.freq.level" + i;
+      }
+      docIndex[i] = docOutput.index();
+      if (Codec.DEBUG) {
+        docIndex[i].desc = "sslw.doc.level" + i;
+      }
+      if (posOutput != null) {
+        posIndex[i] = posOutput.index();
+        if (Codec.DEBUG) {
+          posIndex[i].desc = "sslw.pos.level" + i;
+        }
+      }
+    }
+  }
+
+  boolean omitTF;
+
+  void setOmitTF(boolean v) {
+    omitTF = v;
+  }
+
+  void setPosOutput(IntIndexOutput posOutput) throws IOException {
+    this.posOutput = posOutput;
+    for(int i=0;i<numberOfSkipLevels;i++) {
+      posIndex[i] = posOutput.index();
+      if (Codec.DEBUG) {
+        posIndex[i].desc = "sslw.pos.level" + i;
+      }
+    }
+  }
+
+  void setPayloadOutput(IndexOutput payloadOutput) {
+    this.payloadOutput = payloadOutput;
+  }
+
+  /**
+   * Sets the values for the current skip data. 
+   */
+  // Called @ every index interval (every 128th (by default)
+  // doc)
+  void setSkipData(int doc, boolean storePayloads, int payloadLength) {
+    this.curDoc = doc;
+    this.curStorePayloads = storePayloads;
+    this.curPayloadLength = payloadLength;
+    if (payloadOutput != null) {
+      this.curPayloadPointer = payloadOutput.getFilePointer();
+    }
+  }
+
+  // Called @ start of new term
+  protected void resetSkip(IntIndexOutput.Index topDocIndex, IntIndexOutput.Index topFreqIndex, IntIndexOutput.Index topPosIndex)
+    throws IOException {
+    super.resetSkip();
+    if (Codec.DEBUG) {
+      System.out.println("sslw.reset docIndexBase=" + topDocIndex +
+                         " freqIndexBase=" + topFreqIndex +
+                         " topPosIndex=" + (topPosIndex == null? "null" : (""+topPosIndex)));
+    }
+
+    Arrays.fill(lastSkipDoc, 0);
+    Arrays.fill(lastSkipPayloadLength, -1);  // we don't have to write the first length in the skip list
+    for(int i=0;i<numberOfSkipLevels;i++) {
+      docIndex[i].set(topDocIndex);
+      freqIndex[i].set(topFreqIndex);
+      if (posOutput != null) {
+        posIndex[i].set(topPosIndex);
+      }
+    }
+    if (payloadOutput != null) {
+      Arrays.fill(lastSkipPayloadPointer, payloadOutput.getFilePointer());
+    }
+  }
+  
+  protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
+    // To efficiently store payloads in the posting lists we do not store the length of
+    // every payload. Instead we omit the length for a payload if the previous payload had
+    // the same length.
+    // However, in order to support skipping the payload length at every skip point must be known.
+    // So we use the same length encoding that we use for the posting lists for the skip data as well:
+    // Case 1: current field does not store payloads
+    //           SkipDatum                 --> DocSkip, FreqSkip, ProxSkip
+    //           DocSkip,FreqSkip,ProxSkip --> VInt
+    //           DocSkip records the document number before every SkipInterval th  document in TermFreqs. 
+    //           Document numbers are represented as differences from the previous value in the sequence.
+    // Case 2: current field stores payloads
+    //           SkipDatum                 --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
+    //           DocSkip,FreqSkip,ProxSkip --> VInt
+    //           PayloadLength             --> VInt    
+    //         In this case DocSkip/2 is the difference between
+    //         the current and the previous value. If DocSkip
+    //         is odd, then a PayloadLength encoded as VInt follows,
+    //         if DocSkip is even, then it is assumed that the
+    //         current payload length equals the length at the previous
+    //         skip point
+    if (Codec.DEBUG) {
+      System.out.println("ssw level=" + level + " curDoc=" + curDoc + " lastDoc=" + lastSkipDoc[level] + " delta=" + (curDoc - lastSkipDoc[level]) + " storePayloads=" + curStorePayloads + " skipBufferFP=" + skipBuffer.getFilePointer() + " curPayloadLen=" + curPayloadLength + " freqIndex=" + freqOutput.descFilePointer() + " docIndex=" + docOutput.descFilePointer() + " posIndex=" + posOutput.descFilePointer() + " curPayloadPointer=" + curPayloadPointer);
+    }
+
+    assert !omitTF || !curStorePayloads;
+
+    if (curStorePayloads) {
+      int delta = curDoc - lastSkipDoc[level];
+      if (curPayloadLength == lastSkipPayloadLength[level]) {
+        // the current payload length equals the length at the previous skip point,
+        // so we don't store the length again
+        skipBuffer.writeVInt(delta << 1);
+      } else {
+        // the payload length is different from the previous one. We shift the DocSkip, 
+        // set the lowest bit and store the current payload length as VInt.
+        skipBuffer.writeVInt(delta << 1 | 1);
+        skipBuffer.writeVInt(curPayloadLength);
+        lastSkipPayloadLength[level] = curPayloadLength;
+      }
+    } else {
+      // current field does not store payloads
+      skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
+    }
+
+    if (!omitTF) {
+      freqIndex[level].mark();
+      freqIndex[level].write(skipBuffer, false);
+    }
+    docIndex[level].mark();
+    docIndex[level].write(skipBuffer, false);
+    if (!omitTF) {
+      posIndex[level].mark();
+      posIndex[level].write(skipBuffer, false);
+      skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level]));
+    }
+
+    lastSkipDoc[level] = curDoc;
+    lastSkipPayloadPointer[level] = curPayloadPointer;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntFactory.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntFactory.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntFactory.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntFactory.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,30 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import java.io.IOException;
+
+public class SingleIntFactory extends IntStreamFactory {
+  public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException {
+    return new SingleIntIndexInput(dir, fileName, readBufferSize);
+  }
+  public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
+    return new SingleIntIndexOutput(dir, fileName);
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,112 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.index.codecs.Codec;
+import java.io.IOException;
+
+/** Reads IndexInputs written with {@link
+ * SingleIntIndexoutput} */
+public class SingleIntIndexInput extends IntIndexInput {
+  private final IndexInput in;
+
+  public SingleIntIndexInput(Directory dir, String fileName, int readBufferSize)
+    throws IOException {
+    in = dir.openInput(fileName, readBufferSize);
+    Codec.checkHeader(in, SingleIntIndexOutput.CODEC, SingleIntIndexOutput.VERSION_START);
+  }
+
+  public Reader reader() throws IOException {
+    return new Reader((IndexInput) in.clone());
+  }
+
+  public void close() throws IOException {
+    in.close();
+  }
+
+  public static class Reader extends IntIndexInput.Reader {
+    // clone:
+    private final IndexInput in;
+
+    private final BulkReadResult result = new BulkReadResult();
+
+    public Reader(IndexInput in) {
+      this.in = in;
+      result.offset = 0;
+    }
+
+    /** Reads next single int */
+    public int next() throws IOException {
+      return in.readVInt();
+    }
+
+    /** Reads next chunk of ints */
+    public BulkReadResult read(int[] buffer, int count) throws IOException {
+      result.buffer = buffer;
+      for(int i=0;i<count;i++) {
+        buffer[i] = in.readVInt();
+      }
+      result.len = count;
+      return result;
+    }
+
+    public String descFilePointer() {
+      return Long.toString(in.getFilePointer());
+    }
+  }
+
+  private static class Index extends IntIndexInput.Index {
+    private long fp;
+    boolean first = true;
+
+    public void read(IndexInput indexIn, boolean absolute)
+      throws IOException {
+      long cur = fp;
+      if (absolute) {
+        fp = indexIn.readVLong();
+        first = false;
+      } else {
+        assert !first;
+        fp += indexIn.readVLong();
+      }
+      if (Codec.DEBUG) {
+        System.out.println("siii.idx.read: id=" + desc + " abs=" + absolute + " now=" + fp + " delta=" + (fp-cur));
+      }
+    }
+
+    public void set(IntIndexInput.Index other) {
+      fp = ((Index) other).fp;
+      first = false;
+    }
+
+    public void seek(IntIndexInput.Reader other) throws IOException {
+      ((Reader) other).in.seek(fp);
+    }
+
+    public String toString() {
+      return Long.toString(fp);
+    }
+  }
+
+  public Index index() {
+    return new Index();
+  }
+}
+

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,87 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.index.codecs.Codec;
+
+import java.io.IOException;
+
+/** Writes ints directly to the file (not in blocks) as
+ *  vInt */
+
+public class SingleIntIndexOutput extends IntIndexOutput {
+  private final IndexOutput out;
+  final static String CODEC = "SINGLE_INTS";
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+  private long markPosition;
+  private long lastSavePosition;
+
+  public SingleIntIndexOutput(Directory dir, String fileName) throws IOException {
+    out = dir.createOutput(fileName);
+    Codec.writeHeader(out, CODEC, VERSION_CURRENT);
+  }
+
+  /** Write an int to the primary file */
+  public void write(int v) throws IOException {
+    out.writeVInt(v);
+  }
+
+  public Index index() {
+    return new Index();
+  }
+
+  public void close() throws IOException {
+    out.close();
+  }
+
+  public String descFilePointer() {
+    return Long.toString(out.getFilePointer());
+  }
+
+  private class Index extends IntIndexOutput.Index {
+    long fp;
+    long lastFP;
+    public void mark() {
+      fp = out.getFilePointer();
+      if (Codec.DEBUG) {
+        System.out.println("siio.idx.mark id=" + desc + " fp=" + fp);
+      }
+    }
+    public void set(IntIndexOutput.Index other) {
+      lastFP = fp = ((Index) other).fp;
+    }
+    public void write(IndexOutput indexOut, boolean absolute)
+      throws IOException {
+      if (Codec.DEBUG) {
+        System.out.println("siio.idx.write id=" + desc + " fp=" + fp + " abs=" + absolute + " delta=" + (fp-lastFP));
+      }
+      if (absolute) {
+        indexOut.writeVLong(fp);
+      } else {
+        indexOut.writeVLong(fp - lastFP);
+      }
+      lastFP = fp;
+    }
+    public String toString() {
+      return Long.toString(fp);
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,121 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.index.codecs.MultiLevelSkipListReader;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * Implements the skip list reader for the default posting list format
+ * that stores positions and payloads.
+ *
+ */
+// nocommit -- made public
+public class DefaultSkipListReader extends MultiLevelSkipListReader {
+  private boolean currentFieldStoresPayloads;
+  private long freqPointer[];
+  private long proxPointer[];
+  private int payloadLength[];
+  
+  private long lastFreqPointer;
+  private long lastProxPointer;
+  private int lastPayloadLength;
+                           
+
+  // nocommit -- made public
+  public DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
+    super(skipStream, maxSkipLevels, skipInterval);
+    freqPointer = new long[maxSkipLevels];
+    proxPointer = new long[maxSkipLevels];
+    payloadLength = new int[maxSkipLevels];
+  }
+
+  // nocommit -- made public
+  public void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) {
+    super.init(skipPointer, df);
+    this.currentFieldStoresPayloads = storesPayloads;
+    lastFreqPointer = freqBasePointer;
+    lastProxPointer = proxBasePointer;
+
+    Arrays.fill(freqPointer, freqBasePointer);
+    Arrays.fill(proxPointer, proxBasePointer);
+    Arrays.fill(payloadLength, 0);
+  }
+
+  /** Returns the freq pointer of the doc to which the last call of 
+   * {@link MultiLevelSkipListReader#skipTo(int)} has skipped.  */
+  // nocommit made public
+  public long getFreqPointer() {
+    return lastFreqPointer;
+  }
+
+  /** Returns the prox pointer of the doc to which the last call of 
+   * {@link MultiLevelSkipListReader#skipTo(int)} has skipped.  */
+  // nocommit made public
+  public long getProxPointer() {
+    return lastProxPointer;
+  }
+  
+  /** Returns the payload length of the payload stored just before 
+   * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} 
+   * has skipped.  */
+  // nocommit made public
+  public int getPayloadLength() {
+    return lastPayloadLength;
+  }
+  
+  protected void seekChild(int level) throws IOException {
+    super.seekChild(level);
+    freqPointer[level] = lastFreqPointer;
+    proxPointer[level] = lastProxPointer;
+    payloadLength[level] = lastPayloadLength;
+  }
+  
+  protected void setLastSkipData(int level) {
+    super.setLastSkipData(level);
+    lastFreqPointer = freqPointer[level];
+    lastProxPointer = proxPointer[level];
+    lastPayloadLength = payloadLength[level];
+  }
+
+
+  protected int readSkipData(int level, IndexInput skipStream) throws IOException {
+    int delta;
+    if (currentFieldStoresPayloads) {
+      // the current field stores payloads.
+      // if the doc delta is odd then we have
+      // to read the current payload length
+      // because it differs from the length of the
+      // previous payload
+      delta = skipStream.readVInt();
+      if ((delta & 1) != 0) {
+        payloadLength[level] = skipStream.readVInt();
+      }
+      delta >>>= 1;
+    } else {
+      delta = skipStream.readVInt();
+    }
+    freqPointer[level] += skipStream.readVInt();
+    proxPointer[level] += skipStream.readVInt();
+    
+    return delta;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,149 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.MultiLevelSkipListWriter;
+
+
+/**
+ * Implements the skip list writer for the default posting list format
+ * that stores positions and payloads.
+ *
+ */
+// nocommit -- made public
+public class DefaultSkipListWriter extends MultiLevelSkipListWriter {
+  private int[] lastSkipDoc;
+  private int[] lastSkipPayloadLength;
+  private long[] lastSkipFreqPointer;
+  private long[] lastSkipProxPointer;
+  
+  private IndexOutput freqOutput;
+  // nocommit -- private again
+  public IndexOutput proxOutput;
+
+  private int curDoc;
+  private boolean curStorePayloads;
+  private int curPayloadLength;
+  private long curFreqPointer;
+  private long curProxPointer;
+
+  // nocommit made public
+  public DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) {
+    super(skipInterval, numberOfSkipLevels, docCount);
+    this.freqOutput = freqOutput;
+    this.proxOutput = proxOutput;
+    
+    lastSkipDoc = new int[numberOfSkipLevels];
+    lastSkipPayloadLength = new int[numberOfSkipLevels];
+    lastSkipFreqPointer = new long[numberOfSkipLevels];
+    lastSkipProxPointer = new long[numberOfSkipLevels];
+  }
+
+  // nocommit -- made public
+  public void setFreqOutput(IndexOutput freqOutput) {
+    this.freqOutput = freqOutput;
+  }
+
+  // nocommit -- made public
+  public void setProxOutput(IndexOutput proxOutput) {
+    this.proxOutput = proxOutput;
+  }
+
+  /**
+   * Sets the values for the current skip data. 
+   */
+  // nocommit -- made public
+  public void setSkipData(int doc, boolean storePayloads, int payloadLength) {
+    this.curDoc = doc;
+    this.curStorePayloads = storePayloads;
+    this.curPayloadLength = payloadLength;
+    this.curFreqPointer = freqOutput.getFilePointer();
+    if (proxOutput != null)
+      this.curProxPointer = proxOutput.getFilePointer();
+  }
+
+  // nocommit -- made public
+  public void resetSkip() {
+    super.resetSkip();
+    Arrays.fill(lastSkipDoc, 0);
+    Arrays.fill(lastSkipPayloadLength, -1);  // we don't have to write the first length in the skip list
+    Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer());
+    if (proxOutput != null)
+      Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
+    if (Codec.DEBUG) {
+      if (proxOutput != null)
+        System.out.println("    skip writer base freqFP=" + freqOutput.getFilePointer() + " proxFP=" + proxOutput.getFilePointer());
+      else
+        System.out.println("    skip writer base freqFP=" + freqOutput.getFilePointer());
+    }
+  }
+  
+  protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
+    // To efficiently store payloads in the posting lists we do not store the length of
+    // every payload. Instead we omit the length for a payload if the previous payload had
+    // the same length.
+    // However, in order to support skipping the payload length at every skip point must be known.
+    // So we use the same length encoding that we use for the posting lists for the skip data as well:
+    // Case 1: current field does not store payloads
+    //           SkipDatum                 --> DocSkip, FreqSkip, ProxSkip
+    //           DocSkip,FreqSkip,ProxSkip --> VInt
+    //           DocSkip records the document number before every SkipInterval th  document in TermFreqs. 
+    //           Document numbers are represented as differences from the previous value in the sequence.
+    // Case 2: current field stores payloads
+    //           SkipDatum                 --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
+    //           DocSkip,FreqSkip,ProxSkip --> VInt
+    //           PayloadLength             --> VInt    
+    //         In this case DocSkip/2 is the difference between
+    //         the current and the previous value. If DocSkip
+    //         is odd, then a PayloadLength encoded as VInt follows,
+    //         if DocSkip is even, then it is assumed that the
+    //         current payload length equals the length at the previous
+    //         skip point
+    if (curStorePayloads) {
+      int delta = curDoc - lastSkipDoc[level];
+      if (curPayloadLength == lastSkipPayloadLength[level]) {
+        // the current payload length equals the length at the previous skip point,
+        // so we don't store the length again
+        skipBuffer.writeVInt(delta * 2);
+      } else {
+        // the payload length is different from the previous one. We shift the DocSkip, 
+        // set the lowest bit and store the current payload length as VInt.
+        skipBuffer.writeVInt(delta * 2 + 1);
+        skipBuffer.writeVInt(curPayloadLength);
+        lastSkipPayloadLength[level] = curPayloadLength;
+      }
+    } else {
+      // current field does not store payloads
+      skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
+    }
+    skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
+    skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level]));
+
+    lastSkipDoc[level] = curDoc;
+    //System.out.println("write doc at level " + level + ": " + curDoc);
+    
+    lastSkipFreqPointer[level] = curFreqPointer;
+    lastSkipProxPointer[level] = curProxPointer;
+  }
+
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,54 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.index.TermRef;
+
+import java.io.IOException;
+
+// Handles reading incremental UTF8 encoded terms
+final class DeltaBytesReader {
+  // nocommit: was final
+  TermRef term = new TermRef();
+  final IndexInput in;
+  boolean started;
+
+  DeltaBytesReader(IndexInput in) {
+    this.in = in;
+    term.bytes = new byte[10];
+  }
+
+  void reset(TermRef text) {
+    term.copy(text);
+  }
+
+  void read() throws IOException {
+    // mxx
+    //System.out.println(Thread.currentThread().getName() + ":  dbr termFP=" + in.getFilePointer());
+    final int start = in.readVInt();
+    final int suffix = in.readVInt();
+    // mxx
+    //System.out.println(Thread.currentThread().getName() + ":  start=" + start + " suffix=" + suffix);
+    assert start <= term.length: "start=" + start + " length=" + term.length;
+    final int newLength = start+suffix;
+    term.grow(newLength);
+    in.readBytes(term.bytes, start, suffix);
+    term.length = newLength;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,64 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.store.IndexOutput;
+
+import java.io.IOException;
+
+final class DeltaBytesWriter {
+
+  private final UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
+
+  private byte[] lastBytes = new byte[10];
+  private int lastLength;
+  final IndexOutput out;
+
+  DeltaBytesWriter(IndexOutput out) {
+    this.out = out;
+  }
+
+  void reset() {
+    lastLength = 0;
+  }
+
+  void write(byte[] bytes, int length) throws IOException {
+    int start = 0;
+    final int limit = length < lastLength ? length : lastLength;
+    while(start < limit) {
+      if (bytes[start] != lastBytes[start])
+        break;
+      start++;
+    }
+
+    final int suffix = length - start;
+    // mxx
+    //System.out.println(Thread.currentThread().getName() + ":  dbw start=" + start + " suffix=" + suffix + " outFP=" + out.getFilePointer());
+
+    out.writeVInt(start);                       // prefix
+    out.writeVInt(suffix);                      // suffix
+    out.writeBytes(bytes, start, suffix);
+    if (lastBytes.length < bytes.length) {
+      lastBytes = ArrayUtil.grow(lastBytes, bytes.length);
+    }
+    System.arraycopy(bytes, start, lastBytes, start, suffix);
+    lastLength = length;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DeltaBytesWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,457 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermRef;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.util.ArrayUtil;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Collection;
+import java.io.IOException;
+
+/**
+ * Uses a simplistic format to record terms dict index
+ * information.  Limititations:
+ *
+ *   - Index for all fields is loaded entirely into RAM up
+ *     front 
+ *   - Index is stored in RAM using shared byte[] that
+ *     wastefully expand every term.  Using FST to share
+ *     common prefix & suffix would save RAM.
+ *   - Index is taken at regular numTerms (every 128 by
+ *     default); might be better to do it by "net docFreqs"
+ *     encountered, so that for spans of low-freq terms we
+ *     take index less often.
+ *
+ * A better approach might be something similar to how
+ * postings are encoded, w/ multi-level skips.  Ie, load all
+ * terms index data into memory, as a single large compactly
+ * encoded stream (eg delta bytes + delta offset).  Index
+ * that w/ multi-level skipper.  Then to look up a term is
+ * the equivalent binary search, using the skipper instead,
+ * while data remains compressed in memory.
+ */
+
+import org.apache.lucene.index.IndexFileNames;
+
+public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
+
+  final private int totalIndexInterval;
+  final private int indexDivisor;
+  final private int indexInterval;
+
+  final private IndexInput in;
+  private volatile boolean indexLoaded;
+
+  final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
+
+  public SimpleStandardTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor)
+    throws IOException {
+
+    IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, StandardCodec.TERMS_INDEX_EXTENSION));
+
+    try {
+      Codec.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
+
+      if (Codec.DEBUG) {
+        System.out.println(" readDirStart @ " + in.getFilePointer());
+      }
+
+      final long dirOffset = in.readLong();
+
+      indexInterval = in.readInt();
+      this.indexDivisor = indexDivisor;
+
+      if (indexDivisor == -1) {
+        totalIndexInterval = indexInterval;
+      } else {
+        // In case terms index gets loaded, later, on demand
+        totalIndexInterval = indexInterval * indexDivisor;
+      }
+
+      // Read directory
+      in.seek(dirOffset);
+
+      final int numFields = in.readInt();
+
+      if (Codec.DEBUG) {
+        System.out.println("sstir create seg=" + segment + " numFields=" + numFields + " dirStart=" + dirOffset);
+      }
+
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readInt();
+        if (Codec.DEBUG) {
+          System.out.println("  read field number=" + field);
+        }
+        final int numIndexTerms = in.readInt();
+        final long indexStart = in.readLong();
+        if (numIndexTerms > 0) {
+          final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+          fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart));
+        }
+      }
+    } finally {
+      if (indexDivisor != -1) {
+        in.close();
+        indexLoaded = true;
+        this.in = null;
+      } else {
+        this.in = in;
+        // nocommit -- we shoudl close if index gets read on demand?
+      }
+    }
+  }
+
+  // Fixed size byte blocks, to hold all term bytes; these
+  // blocks are shared across fields
+  private byte[][] blocks;
+  int blockUpto;
+  int blockOffset;
+
+  private static final int BYTE_BLOCK_SHIFT = 15;
+  private static final int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
+  private static final int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
+
+  private final class FieldIndexReader extends FieldReader {
+
+    final private FieldInfo fieldInfo;
+
+    private volatile CoreFieldIndex coreIndex;
+
+    private final IndexInput in;
+
+    private final long indexStart;
+
+    private final int numIndexTerms;
+
+    public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart) throws IOException {
+
+      this.fieldInfo = fieldInfo;
+      this.in = in;
+      this.indexStart = indexStart;
+      this.numIndexTerms = numIndexTerms;
+
+      // We still create the indexReader when indexDivisor
+      // is -1, so that StandardTermsDictReader can call
+      // isIndexTerm for each field:
+      if (indexDivisor != -1) {
+
+        if (Codec.DEBUG) {
+          System.out.println("read index for field=" + fieldInfo.name + " numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor + " indexFP=" + indexStart);
+        }
+
+        coreIndex = new CoreFieldIndex(indexStart,
+                                       numIndexTerms);
+      
+      } else {
+        if (Codec.DEBUG) {
+          System.out.println("skip read index for field=" + fieldInfo.name + " numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor);
+        }
+      }
+    }
+
+    public void loadTermsIndex() throws IOException {
+      if (coreIndex == null) {
+        coreIndex = new CoreFieldIndex(indexStart, numIndexTerms);
+      }
+    }
+
+    public boolean isIndexTerm(int position, int docFreq) {
+      return position % totalIndexInterval == 0;
+    }
+
+    public final void getIndexOffset(TermRef term, TermsIndexResult result) throws IOException {
+      // You must call loadTermsIndex if you had specified -1 for indexDivisor
+      if (coreIndex == null) {
+        throw new IllegalStateException("terms index was not loaded");
+      }
+      coreIndex.getIndexOffset(term, result);
+    }
+
+    public final void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
+      // You must call loadTermsIndex if you had specified -1 for indexDivisor
+      if (coreIndex == null) {
+        throw new IllegalStateException("terms index was not loaded");
+      }
+      coreIndex.getIndexOffset(ord, result);
+    }
+
+    private final class CoreFieldIndex {
+
+      // TODO: used packed ints here
+      // Pointer into terms dict file that we are indexing
+      final long[] fileOffset;
+
+      // TODO: used packed ints here
+      // For each term, points to start of term's bytes within
+      // block.
+      // TODO: wasteful that this is always long; many terms
+      // dict indexes obviously don't require so much address
+      // space; since we know up front during indexing how
+      // much space is needed we could pack this to the
+      // precise # bits
+      final long[] blockPointer;
+    
+      // Length of each term
+      // nocommit -- this is length in bytes; is short
+      // sufficient?  have to use negative space?
+      // TODO: used packed ints here: we know max term
+      // length; often its small
+      final short[] termLength;
+
+      final int numIndexTerms;
+
+      CoreFieldIndex(long indexStart, int numIndexTerms) throws IOException {
+
+        IndexInput clone = (IndexInput) in.clone();
+        clone.seek(indexStart);
+
+        if (indexDivisor == -1) {
+          // Special case: we are being loaded inside
+          // IndexWriter because a SegmentReader that at
+          // first was opened for merging, is now being
+          // opened to perform deletes or for an NRT reader
+
+          // nocommit -- how to allow apps to indexDivisor
+          // in this case?
+          this.numIndexTerms = numIndexTerms;
+        } else {
+          this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor;
+        }
+
+        assert this.numIndexTerms  > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor;
+
+        if (blocks == null) {
+          blocks = new byte[1][];
+          blocks[0] = new byte[BYTE_BLOCK_SIZE];
+        }
+
+        byte[] lastBlock = blocks[blockUpto];
+        int lastBlockOffset = blockOffset;
+
+        fileOffset = new long[this.numIndexTerms];
+        blockPointer = new long[this.numIndexTerms];
+        termLength = new short[this.numIndexTerms];
+        
+        // nocommit: unused?
+        //final DeltaBytesReader bytesReader = new DeltaBytesReader(clone);
+
+        final byte[] skipBytes;
+        if (indexDivisor != 1) {
+          // only need skipBytes (below) if we are not
+          // loading all index terms
+          skipBytes = new byte[128];
+        } else {
+          skipBytes = null;
+        }
+
+        int upto = 0;
+        long pointer = 0;
+      
+        for(int i=0;i<numIndexTerms;i++) {
+          final int start = clone.readVInt();
+          final int suffix = clone.readVInt();
+          final int thisTermLength = start + suffix;
+
+          // nocommit -- verify this is in fact guaranteed by
+          // DW -- we are talking bytes not chars here
+          assert thisTermLength <= BYTE_BLOCK_SIZE;
+
+          if (i%indexDivisor == 0) {
+            // Keeper
+            if (blockOffset + thisTermLength > BYTE_BLOCK_SIZE) {
+              // New block
+              final byte[] newBlock = new byte[BYTE_BLOCK_SIZE];
+              if (blocks.length == blockUpto-1) {
+                final int newSize = ArrayUtil.getNextSize(blockUpto+1);
+                final byte[][] newBlocks = new byte[newSize][];
+                System.arraycopy(blocks, 0, newBlocks, 0, blocks.length);
+                blocks = newBlocks;
+              }
+              blocks[blockUpto] = newBlock;
+              blockUpto++;
+              blockOffset = 0;
+            }
+
+            final byte[] block = blocks[blockUpto];
+
+            // Copy old prefix
+            assert lastBlock != null || start == 0;
+            assert block != null;
+            System.arraycopy(lastBlock, lastBlockOffset, block, blockOffset, start);
+
+            // Read new suffix
+            clone.readBytes(block, blockOffset+start, suffix);
+
+            // Advance file offset
+            pointer += clone.readVLong();
+
+            assert thisTermLength < Short.MAX_VALUE;
+
+            termLength[upto] = (short) thisTermLength;
+            fileOffset[upto] = pointer;
+            blockPointer[upto] = blockUpto * BYTE_BLOCK_SIZE + blockOffset;
+            TermRef tr = new TermRef();
+            tr.bytes = blocks[blockUpto];
+            tr.offset = blockOffset;
+            tr.length = thisTermLength;
+            //System.out.println("    read index term=" + new String(blocks[blockUpto], blockOffset, thisTermLength, "UTF-8") + " this=" + this + " bytes=" + block + " (vs=" + blocks[blockUpto] + ") offset=" + blockOffset);
+            //System.out.println("    read index term=" + tr.toBytesString() + " this=" + this + " bytes=" + block + " (vs=" + blocks[blockUpto] + ") offset=" + blockOffset);
+
+            lastBlock = block;
+            lastBlockOffset = blockOffset;
+            blockOffset += thisTermLength;
+            upto++;
+          } else {
+            // Skip bytes
+            int toSkip = suffix;
+            while(true) {
+              if (toSkip > skipBytes.length) {
+                clone.readBytes(skipBytes, 0, skipBytes.length);
+                toSkip -= skipBytes.length;
+              } else {
+                clone.readBytes(skipBytes, 0, toSkip);
+                break;
+              }
+            }
+
+            // Advance file offset
+            pointer += clone.readVLong();
+          }
+        }
+
+        // nocommit: put in finally clause
+        clone.close();
+
+        assert upto == this.numIndexTerms;
+
+        if (Codec.DEBUG) {
+          System.out.println("  done read");
+        }
+      }
+
+      final private TermRef termBuffer = new TermRef();
+      final private TermsIndexResult termsIndexResult = new TermsIndexResult();
+
+      public final void getIndexOffset(TermRef term, TermsIndexResult result) throws IOException {
+
+        if (Codec.DEBUG) {
+          System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term + " indexLen = " + blockPointer.length + " numIndexTerms=" + fileOffset.length + " this=" + this);
+        }
+
+        int lo = 0;					  // binary search
+        int hi = fileOffset.length - 1;
+
+        while (hi >= lo) {
+          int mid = (lo + hi) >> 1;
+
+          final long loc = blockPointer[mid];
+          result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
+          result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
+          //System.out.println("  cycle mid=" + mid + " bytes=" + result.term.bytes + " offset=" + result.term.offset);
+          result.term.length = termLength[mid];
+          //System.out.println("    term=" + result.term);
+
+          int delta = term.compareTerm(result.term);
+          if (delta < 0) {
+            hi = mid - 1;
+          } else if (delta > 0) {
+            lo = mid + 1;
+          } else {
+            assert mid >= 0;
+            result.position = mid*totalIndexInterval;
+            result.offset = fileOffset[mid];
+            return;
+          }
+        }
+        if (hi < 0) {
+          assert hi == -1;
+          hi = 0;
+        }
+
+        final long loc = blockPointer[hi];
+        result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
+        result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
+        result.term.length = termLength[hi];
+        //System.out.println("    hi term=" + result.term);
+
+        result.position = hi*totalIndexInterval;
+        result.offset = fileOffset[hi];
+      }
+
+      public final void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
+        int idx = (int) (ord / totalIndexInterval);
+        // caller must ensure ord is in bounds
+        assert idx < numIndexTerms;
+
+        final long loc = blockPointer[idx];
+        result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
+        result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
+        result.term.length = termLength[idx];
+        result.position = idx * totalIndexInterval;
+        result.offset = fileOffset[idx];
+      }
+    }
+  }
+
+  public void loadTermsIndex() throws IOException {
+
+    if (!indexLoaded) {
+
+      // mxx
+      if (Codec.DEBUG) {
+        System.out.println(Thread.currentThread().getName() + ": sstir: load coreIndex on demand");
+      }
+
+      Iterator<FieldIndexReader> it = fields.values().iterator();
+      while(it.hasNext()) {
+        it.next().loadTermsIndex();
+      }
+      indexLoaded = true;
+    }
+  }
+
+  public FieldReader getField(FieldInfo fieldInfo) {
+    return fields.get(fieldInfo);
+  }
+
+  public static void files(SegmentInfo info, Collection files) {
+    files.add(IndexFileNames.segmentFileName(info.name, StandardCodec.TERMS_INDEX_EXTENSION));
+  }
+
+  public static void getIndexExtensions(Collection extensions) {
+    extensions.add(StandardCodec.TERMS_INDEX_EXTENSION);
+  }
+
+  public void getExtensions(Collection extensions) {
+    getIndexExtensions(extensions);
+  }
+
+  public void close() throws IOException {
+    if (in != null) {
+      in.close();
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,137 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.io.IOException;
+
+public class SimpleStandardTermsIndexWriter extends StandardTermsIndexWriter {
+  final private IndexOutput out;
+
+  final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  final private int termIndexInterval;
+
+  private final List<SimpleFieldWriter> fields = new ArrayList<SimpleFieldWriter>();
+  private final FieldInfos fieldInfos;
+  private IndexOutput termsOut;
+
+  // nocommit
+  final private String segment;
+
+  public SimpleStandardTermsIndexWriter(SegmentWriteState state) throws IOException {
+    final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, StandardCodec.TERMS_INDEX_EXTENSION);
+    state.flushedFiles.add(indexFileName);
+    this.segment = state.segmentName;
+    termIndexInterval = state.termIndexInterval;
+    out = state.directory.createOutput(indexFileName);
+    Codec.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
+    fieldInfos = state.fieldInfos;
+
+    // Placeholder for dir offset
+    out.writeLong(0);
+    out.writeInt(termIndexInterval);
+    termWriter = new DeltaBytesWriter(out);
+  }
+
+  @Override
+  public void setTermsOutput(IndexOutput termsOut) {
+    this.termsOut = termsOut;
+  }
+  
+  final private DeltaBytesWriter termWriter;
+  private FieldInfo currentField;
+
+  public FieldWriter addField(FieldInfo field) {
+    currentField = field;
+    SimpleFieldWriter writer = new SimpleFieldWriter(field);
+    fields.add(writer);
+    return writer;
+  }
+
+  private class SimpleFieldWriter extends FieldWriter {
+    final FieldInfo fieldInfo;
+    int numIndexTerms;
+    private long lastTermsPointer;
+    final long indexStart;
+    private int numTerms;
+
+    SimpleFieldWriter(FieldInfo fieldInfo) {
+      this.fieldInfo = fieldInfo;
+      indexStart = out.getFilePointer();
+      termWriter.reset();
+    }
+
+    public boolean checkIndexTerm(byte[] term, int termLength, int docFreq) throws IOException {
+      // First term is first indexed term:
+      if (0 == (numTerms++ % termIndexInterval)) {
+        final long termsPointer = termsOut.getFilePointer();
+        if (Codec.DEBUG) {
+          System.out.println("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + new String(term, 0, termLength, "UTF-8") + " termsFP=" + termsPointer + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer());
+        }
+        // mxx
+        //System.out.println(Thread.currentThread().getName() + ": ii seg=" + segment + " term=" + fieldInfo.name + ":" + new String(term, 0, termLength, "UTF-8") + " numTerms=" + (numTerms-1) + " termFP=" + termsPointer);
+        termWriter.write(term, termLength);
+        out.writeVLong(termsPointer - lastTermsPointer);
+        lastTermsPointer = termsPointer;
+        numIndexTerms++;
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+
+  public void close() throws IOException {
+    final long dirStart = out.getFilePointer();
+    if (Codec.DEBUG) {
+      System.out.println("sstiw.close seg=" + segment + " dirStart=" + dirStart);
+    }
+    final int fieldCount = fields.size();
+
+    out.writeInt(fieldCount);
+    for(int i=0;i<fieldCount;i++) {
+      SimpleFieldWriter field = fields.get(i);
+      if (Codec.DEBUG) {
+        System.out.println("sstiw.close save field=" + field.fieldInfo.name + " numIndexTerms=" + field.numIndexTerms);
+      }
+      out.writeInt(field.fieldInfo.number);
+      out.writeInt(field.numIndexTerms);
+      out.writeLong(field.indexStart);
+    }
+    out.seek(Codec.headerSize(CODEC_NAME));
+    // nocommit -- why not simply write last 8 bytes of
+    // file?  hmm would require accurate filelength() in
+    // reader
+    out.writeLong(dirStart);
+    if (Codec.DEBUG) {
+      System.out.println(" writeDirStart " + dirStart + " @ " + Codec.headerSize(CODEC_NAME));
+    }
+    out.close();
+  }
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message