lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r927234 - in /lucene/java/branches/flex_1458: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/codecs/ src/java/org/apache/lucene/index/codecs/standard/ src/java/org/apache/lucene/util/ src/java/org/apache/lucene/util/p...
Date Thu, 25 Mar 2010 00:03:17 GMT
Author: mikemccand
Date: Thu Mar 25 00:03:16 2010
New Revision: 927234

URL: http://svn.apache.org/viewvc?rev=927234&view=rev
Log:
LUCENE-2321: cutover to packed ints for in-memory terms dict index

Added:
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java   (with props)
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java   (with props)
Modified:
    lucene/java/branches/flex_1458/CHANGES.txt
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/ArrayUtil.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed32.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed64.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedInts.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedWriter.java
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/util/packed/TestPackedInts.java

Modified: lucene/java/branches/flex_1458/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/CHANGES.txt?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/CHANGES.txt (original)
+++ lucene/java/branches/flex_1458/CHANGES.txt Thu Mar 25 00:03:16 2010
@@ -34,6 +34,10 @@ New features
   bounded, for example for storing the terms dict index Toke Toke
   Eskildsen via Mike McCandless)
 
+* LUCENE-2321: Cutover to a more RAM efficient packed-ints based
+  representation for the in-memory terms dict index.  (Mike
+  McCandless)
+
 ======================= Trunk (not yet released) =======================
 
 Changes in backwards compatibility policy

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java Thu Mar 25 00:03:16 2010
@@ -125,6 +125,9 @@ public class CheckIndex {
       /** Name of the segment. */
       public String name;
 
+      /** Name of codec used to read this segment. */
+      public String codec;
+
       /** Document count (does not take deletions into account). */
       public int docCount;
 
@@ -418,6 +421,9 @@ public class CheckIndex {
       SegmentReader reader = null;
 
       try {
+        final String codec = info.getCodec().name;
+        msg("    codec=" + codec);
+        segInfoStat.codec = codec;
         msg("    compound=" + info.getUseCompoundFile());
         segInfoStat.compound = info.getUseCompoundFile();
         msg("    hasProx=" + info.getHasProx());
@@ -441,6 +447,7 @@ public class CheckIndex {
           msg("    docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
           segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
         }
+
         final String delFileName = info.getDelFileName();
         if (delFileName == null){
           msg("    no deletions");

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Thu Mar 25 00:03:16 2010
@@ -93,5 +93,7 @@ public abstract class TermsConsumer {
         }
       }
     }
+
+    finish();
   }
 }

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java?rev=927234&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java Thu Mar 25 00:03:16 2010
@@ -0,0 +1,129 @@
+package org.apache.lucene.index.codecs.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.store.IndexInput;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Represents a logical byte[] as a series of pages.  You
+ *  can write-once into the logical byte[], using copy, and
+ *  then retrieve slices (BytesRef) into it using fill. */
+class PagedBytes implements Closeable {
+  private final List<byte[]> blocks = new ArrayList<byte[]>();
+  private final int blockSize;
+  private final int blockBits;
+  private final int blockMask;
+  private int upto;
+  private byte[] currentBlock;
+  private final CloseableThreadLocal<byte[]> threadBuffers = new CloseableThreadLocal();
+
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
+  /** 1<<blockBits must be bigger than biggest single
+   *  BytesRef slice that will be pulled */
+  public PagedBytes(int blockBits) {
+    this.blockSize = 1 << blockBits;
+    this.blockBits = blockBits;
+    blockMask = blockSize-1;
+    upto = blockSize;
+  }
+
+  /** Read this many bytes from in */
+  public void copy(IndexInput in, long byteCount) throws IOException {
+    while (byteCount > 0) {
+      int left = blockSize - upto;
+      if (left == 0) {
+        if (currentBlock != null) {
+          blocks.add(currentBlock);
+        }
+        currentBlock = new byte[blockSize];
+        upto = 0;
+        left = blockSize;
+      }
+      if (left < byteCount) {
+        in.readBytes(currentBlock, upto, left, false);
+        upto = blockSize;
+        byteCount -= left;
+      } else {
+        in.readBytes(currentBlock, upto, (int) byteCount, false);
+        upto += byteCount;
+        byteCount = 0;
+      }
+    }
+  }
+
+  /** Commits final byte[], trimming it if necessary. */
+  public void finish() {
+    if (upto < blockSize) {
+      final byte[] newBlock = new byte[upto];
+      System.arraycopy(currentBlock, 0, newBlock, 0, upto);
+      currentBlock = newBlock;
+    }
+    if (currentBlock == null) {
+      currentBlock = EMPTY_BYTES;
+    }
+    blocks.add(currentBlock);
+    currentBlock = null;
+  }
+
+  public long getPointer() {
+    if (currentBlock == null) {
+      return 0;
+    } else {
+      return (blocks.size() * ((long) blockSize)) + upto;
+    }
+  }
+
+  /** Get a slice out of the byte array. */
+  public void fill(BytesRef b, long start, int length) {
+    assert length >= 0: "length=" + length;
+    final int index = (int) (start >> blockBits);
+    final int offset = (int) (start & blockMask);
+    b.length = length;
+    if (blockSize - offset >= length) {
+      // Within block
+      b.bytes = blocks.get(index);
+      b.offset = offset;
+    } else {
+      // Split
+      byte[] buffer = threadBuffers.get();
+      if (buffer == null) {
+        buffer = new byte[length];
+        threadBuffers.set(buffer);
+      } else if (buffer.length < length) {
+        buffer = ArrayUtil.grow(buffer, length);
+        threadBuffers.set(buffer);
+      }
+      b.bytes = buffer;
+      b.offset = 0;
+      System.arraycopy(blocks.get(index), offset, buffer, 0, blockSize-offset);
+      System.arraycopy(blocks.get(1+index), 0, buffer, blockSize-offset, length-(blockSize-offset));
+    }
+  }
+
+  public void close() {
+    threadBuffers.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java Thu Mar 25 00:03:16 2010
@@ -28,6 +28,7 @@ import org.apache.lucene.index.codecs.Co
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.packed.PackedInts;
 
 import java.util.HashMap;
 import java.util.Iterator;
@@ -63,7 +64,7 @@ import org.apache.lucene.index.IndexFile
 /** @lucene.experimental */
 public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
 
-  final private int totalIndexInterval;
+  private int totalIndexInterval;
   private int indexDivisor;
   final private int indexInterval;
 
@@ -72,6 +73,12 @@ public class SimpleStandardTermsIndexRea
   private volatile boolean indexLoaded;
 
   private final Comparator<BytesRef> termComp;
+  private final String segment;
+
+  private final static int PAGED_BYTES_BITS = 15;
+
+  // all fields share this single logical byte[]
+  private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
 
   final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
 
@@ -80,6 +87,8 @@ public class SimpleStandardTermsIndexRea
 
     this.termComp = termComp;
 
+    this.segment = segment;
+
     IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, StandardCodec.TERMS_INDEX_EXTENSION));
     
     boolean success = false;
@@ -118,10 +127,14 @@ public class SimpleStandardTermsIndexRea
           System.out.println("  read field number=" + field);
         }
         final int numIndexTerms = in.readInt();
+        final long termsStart = in.readLong();
         final long indexStart = in.readLong();
+        final long packedIndexStart = in.readLong();
+        final long packedOffsetsStart = in.readLong();
+        assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment;
         if (numIndexTerms > 0) {
           final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-          fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart));
+          fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
         }
       }
       success = true;
@@ -130,57 +143,15 @@ public class SimpleStandardTermsIndexRea
         in.close();
         this.in = null;
         if (success) {
-          trimByteBlock();
           indexLoaded = true;
         }
+        termBytes.finish();
       } else {
         this.in = in;
       }
     }
   }
 
-  /* Called when index is fully loaded.  We know we will use
-   * no more bytes in the final byte[], so trim it down to
-   * its actual usagee.  This substantially reduces memory
-   * usage of SegmentReader searching a tiny segment. */
-  private final void trimByteBlock() {
-    if (blockOffset == 0) {
-      // There were no fields in this segment:
-      if (blocks != null) {
-        blocks[blockUpto] = null;
-      }
-    } else {
-      byte[] last = new byte[blockOffset];
-      System.arraycopy(blocks[blockUpto], 0, last, 0, blockOffset);
-      blocks[blockUpto] = last;
-    }
-  }
-
-  // TODO: we can record precisely how many bytes are
-  // required during indexing, save that into file, and be
-  // precise when we allocate the blocks; we even don't need
-  // to use blocks anymore (though my still want to, to
-  // prevent allocation failure due to mem fragmentation on
-  // 32bit)
-
-  // Fixed size byte blocks, to hold all term bytes; these
-  // blocks are shared across fields
-  private byte[][] blocks;
-  int blockUpto;
-  int blockOffset;
-
-  private static final int BYTE_BLOCK_SHIFT = 15;
-  private static final int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
-  private static final int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
-
-  static {
-    // Make sure DW can't ever write a term whose length
-    // cannot be encoded with short (because we use short[]
-    // to hold the length of each term).
-    assert IndexWriter.MAX_TERM_LENGTH < Short.MAX_VALUE;
-    assert BYTE_BLOCK_SIZE >= IndexWriter.MAX_TERM_LENGTH;
-  }
-
   private final class FieldIndexReader extends FieldReader {
 
     final private FieldInfo fieldInfo;
@@ -190,14 +161,21 @@ public class SimpleStandardTermsIndexRea
     private final IndexInput in;
 
     private final long indexStart;
+    private final long termsStart;
+    private final long packedIndexStart;
+    private final long packedOffsetsStart;
 
     private final int numIndexTerms;
 
-    public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart) throws IOException {
+    public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
+                            long packedOffsetsStart) throws IOException {
 
       this.fieldInfo = fieldInfo;
       this.in = in;
+      this.termsStart = termsStart;
       this.indexStart = indexStart;
+      this.packedIndexStart = packedIndexStart;
+      this.packedOffsetsStart = packedOffsetsStart;
       this.numIndexTerms = numIndexTerms;
 
       // We still create the indexReader when indexDivisor
@@ -210,6 +188,9 @@ public class SimpleStandardTermsIndexRea
         }
 
         coreIndex = new CoreFieldIndex(indexStart,
+                                       termsStart,
+                                       packedIndexStart,
+                                       packedOffsetsStart,
                                        numIndexTerms);
       
       } else {
@@ -221,7 +202,7 @@ public class SimpleStandardTermsIndexRea
 
     public void loadTermsIndex() throws IOException {
       if (coreIndex == null) {
-        coreIndex = new CoreFieldIndex(indexStart, numIndexTerms);
+        coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms);
       }
     }
 
@@ -263,150 +244,115 @@ public class SimpleStandardTermsIndexRea
 
     private final class CoreFieldIndex {
 
-      // TODO: used packed ints here
-      // Pointer into terms dict file that we are indexing
-      final long[] fileOffset;
-
-      // TODO: used packed ints here
-      // For each term, points to start of term's bytes within
-      // block.
-      // TODO: wasteful that this is always long; many terms
-      // dict indexes obviously don't require so much address
-      // space; since we know up front during indexing how
-      // much space is needed we could pack this to the
-      // precise # bits
-      final long[] blockPointer;
-    
-      // TODO: used packed ints here: we know max term
-      // length; often its small
+      final private long termBytesStart;
 
-      // TODO: can we inline this w/ the bytes?  like
-      // DW.  vast majority of terms only need 1 byte, not 2
+      // offset into index termBytes
+      final PackedInts.Reader termOffsets;
 
-      // Length of each term
-      final short[] termLength;
+      // index pointers into main terms dict
+      final PackedInts.Reader termsDictOffsets;
 
       final int numIndexTerms;
 
-      CoreFieldIndex(long indexStart, int numIndexTerms) throws IOException {
+      final long termsStart;
+
+      public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException {
+
+        this.termsStart = termsStart;
+        termBytesStart = termBytes.getPointer();
 
         IndexInput clone = (IndexInput) in.clone();
         clone.seek(indexStart);
 
-        if (indexDivisor == -1) {
-          // Special case: we are being loaded inside
-          // IndexWriter because a SegmentReader that at
-          // first was opened for merging, is now being
-          // opened to perform deletes or for an NRT reader
-          this.numIndexTerms = numIndexTerms;
-        } else {
-          this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor;
-        }
+        // -1 is passed to mean "don't load term index", but
+        // if we are then later loaded it's overwritten with
+        // a real value
+        assert indexDivisor > 0;
+
+        this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor;
 
         assert this.numIndexTerms  > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor;
 
-        if (blocks == null) {
-          blocks = new byte[1][];
-          blocks[0] = new byte[BYTE_BLOCK_SIZE];
-        }
-
-        byte[] lastBlock = blocks[blockUpto];
-        int lastBlockOffset = blockOffset;
-
-        fileOffset = new long[this.numIndexTerms];
-        blockPointer = new long[this.numIndexTerms];
-        termLength = new short[this.numIndexTerms];
-        
-        final byte[] skipBytes;
-        if (indexDivisor != 1) {
-          // only need skipBytes (below) if we are not
-          // loading all index terms
-          skipBytes = new byte[128];
+        if (indexDivisor == 1) {
+          // Default (load all index terms) is fast -- slurp in the images from disk:
+          
+          try {
+            final long numTermBytes = packedIndexStart - indexStart;
+            termBytes.copy(clone, numTermBytes);
+
+            // records offsets into main terms dict file
+            termsDictOffsets = PackedInts.getReader(clone);
+            assert termsDictOffsets.size() == numIndexTerms;
+
+            // records offsets into byte[] term data
+            termOffsets = PackedInts.getReader(clone);
+            assert termOffsets.size() == 1+numIndexTerms;
+          } finally {
+            clone.close();
+          }
         } else {
-          skipBytes = null;
-        }
+          // Get packed iterators
+          final IndexInput clone1 = (IndexInput) in.clone();
+          final IndexInput clone2 = (IndexInput) in.clone();
 
-        int upto = 0;
-        long pointer = 0;
-      
-        for(int i=0;i<numIndexTerms;i++) {
-          final int start = clone.readVInt();
-          final int suffix = clone.readVInt();
-          final int thisTermLength = start + suffix;
-
-          assert thisTermLength <= BYTE_BLOCK_SIZE;
-
-          if (i%indexDivisor == 0) {
-            // Keeper
-            if (blockOffset + thisTermLength > BYTE_BLOCK_SIZE) {
-              // New block
-              final byte[] newBlock = new byte[BYTE_BLOCK_SIZE];
-              if (blocks.length == blockUpto+1) {
-                final int newSize = ArrayUtil.oversize(blockUpto+2, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
-                final byte[][] newBlocks = new byte[newSize][];
-                System.arraycopy(blocks, 0, newBlocks, 0, blocks.length);
-                blocks = newBlocks;
-              }
-              blockUpto++;
-              blocks[blockUpto] = newBlock;
-              blockOffset = 0;
-            }
+          try {
+            // Subsample the index terms
+            clone1.seek(packedIndexStart);
+            final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1);
 
-            final byte[] block = blocks[blockUpto];
+            clone2.seek(packedOffsetsStart);
+            final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2);
 
-            // Copy old prefix
-            assert lastBlock != null || start == 0;
-            assert block != null;
-            System.arraycopy(lastBlock, lastBlockOffset, block, blockOffset, start);
-
-            // Read new suffix
-            clone.readBytes(block, blockOffset+start, suffix);
-
-            // Advance file offset
-            pointer += clone.readVLong();
-
-            assert thisTermLength < Short.MAX_VALUE;
-
-            termLength[upto] = (short) thisTermLength;
-            fileOffset[upto] = pointer;
-            blockPointer[upto] = blockUpto * BYTE_BLOCK_SIZE + blockOffset;
-
-            /*
-            BytesRef tr = new BytesRef();
-            tr.bytes = blocks[blockUpto];
-            tr.offset = blockOffset;
-            tr.length = thisTermLength;
-
-            //System.out.println("    read index term=" + new String(blocks[blockUpto], blockOffset, thisTermLength, "UTF-8") + " this=" + this + " bytes=" + block + " (vs=" + blocks[blockUpto] + ") offset=" + blockOffset);
-            //System.out.println("    read index term=" + tr.toBytesString() + " this=" + this + " bytes=" + block + " (vs=" + blocks[blockUpto] + ") offset=" + blockOffset);
-            */
-
-            lastBlock = block;
-            lastBlockOffset = blockOffset;
-            blockOffset += thisTermLength;
-            upto++;
-          } else {
-            // Skip bytes
-            int toSkip = suffix;
-            while(true) {
-              if (toSkip > skipBytes.length) {
-                clone.readBytes(skipBytes, 0, skipBytes.length);
-                toSkip -= skipBytes.length;
-              } else {
-                clone.readBytes(skipBytes, 0, toSkip);
-                break;
+            // TODO: often we can get by w/ fewer bits per
+            // value, below.. .but this'd be more complex:
+            // we'd have to try @ fewer bits and then grow
+            // if we overflowed it.
+
+            PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue());
+            PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue());
+
+            termsDictOffsets = termsDictOffsetsM;
+            termOffsets = termOffsetsM;
+
+            int upto = 0;
+
+            long lastTermOffset = 0;
+            long termOffsetUpto = 0;
+
+            while(upto < this.numIndexTerms) {
+              // main file offset copies straight over
+              termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
+
+              termOffsetsM.set(upto, termOffsetUpto);
+              upto++;
+
+              long termOffset = termOffsetsIter.next();
+              long nextTermOffset = termOffsetsIter.next();
+              final int numTermBytes = (int) (nextTermOffset - termOffset);
+
+              clone.seek(indexStart + termOffset);
+              assert indexStart + termOffset < clone.length() : "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.length();
+              assert indexStart + termOffset + numTermBytes < clone.length();
+
+              termBytes.copy(clone, numTermBytes);
+              termOffsetUpto += numTermBytes;
+
+              // skip terms:
+              termsDictOffsetsIter.next();
+              for(int i=0;i<indexDivisor-2;i++) {
+                termOffsetsIter.next();
+                termsDictOffsetsIter.next();
               }
             }
+            termOffsetsM.set(upto, termOffsetUpto);
 
-            // Advance file offset
-            pointer += clone.readVLong();
+          } finally {
+            clone1.close();
+            clone2.close();
+            clone.close();
           }
         }
 
-        clone.close();
-
-        assert upto == this.numIndexTerms;
-
         if (Codec.DEBUG) {
           System.out.println("  done read");
         }
@@ -423,30 +369,28 @@ public class SimpleStandardTermsIndexRea
       }
 
       private final void fillResult(int idx, TermsIndexResult result) {
-        final long loc = blockPointer[idx];
-        result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
-        result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
-        result.term.length = termLength[idx];
+        final long offset = termOffsets.get(idx);
+        final int length = (int) (termOffsets.get(1+idx) - offset);
+        termBytes.fill(result.term, termBytesStart + offset, length);
         result.position = idx * totalIndexInterval;
-        result.offset = fileOffset[idx];
+        result.offset = termsStart + termsDictOffsets.get(idx);
       }
 
       public final void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
 
         if (Codec.DEBUG) {
-          System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term + " indexLen = " + blockPointer.length + " numIndexTerms=" + fileOffset.length + " numIndexedTerms=" + fileOffset.length);
+          System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term.utf8ToString());
         }
 
         int lo = 0;					  // binary search
-        int hi = fileOffset.length - 1;
+        int hi = numIndexTerms - 1;
 
         while (hi >= lo) {
           int mid = (lo + hi) >>> 1;
 
-          final long loc = blockPointer[mid];
-          result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
-          result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
-          result.term.length = termLength[mid];
+          final long offset = termOffsets.get(mid);
+          final int length = (int) (termOffsets.get(1+mid) - offset);
+          termBytes.fill(result.term, termBytesStart + offset, length);
 
           int delta = termComp.compare(term, result.term);
           if (delta < 0) {
@@ -456,7 +400,7 @@ public class SimpleStandardTermsIndexRea
           } else {
             assert mid >= 0;
             result.position = mid*totalIndexInterval;
-            result.offset = fileOffset[mid];
+            result.offset = termsStart + termsDictOffsets.get(mid);
             return;
           }
         }
@@ -465,13 +409,12 @@ public class SimpleStandardTermsIndexRea
           hi = 0;
         }
 
-        final long loc = blockPointer[hi];
-        result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)];
-        result.term.offset = (int) (loc & BYTE_BLOCK_MASK);
-        result.term.length = termLength[hi];
+        final long offset = termOffsets.get(hi);
+        final int length = (int) (termOffsets.get(1+hi) - offset);
+        termBytes.fill(result.term, termBytesStart + offset, length);
 
         result.position = hi*totalIndexInterval;
-        result.offset = fileOffset[hi];
+        result.offset = termsStart + termsDictOffsets.get(hi);
       }
 
       public final void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
@@ -488,6 +431,7 @@ public class SimpleStandardTermsIndexRea
     if (!indexLoaded) {
 
       this.indexDivisor = indexDivisor;
+      this.totalIndexInterval = indexInterval * indexDivisor;
 
       // mxx
       if (Codec.DEBUG) {
@@ -498,10 +442,10 @@ public class SimpleStandardTermsIndexRea
       while(it.hasNext()) {
         it.next().loadTermsIndex();
       }
-      trimByteBlock();
 
       indexLoaded = true;
       in.close();
+      termBytes.finish();
     }
   }
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java Thu Mar 25 00:03:16 2010
@@ -25,6 +25,8 @@ import org.apache.lucene.index.SegmentWr
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.packed.PackedInts;
 
 import java.util.List;
 import java.util.ArrayList;
@@ -58,7 +60,6 @@ public class SimpleStandardTermsIndexWri
     // Placeholder for dir offset
     out.writeLong(0);
     out.writeInt(termIndexInterval);
-    termWriter = new DeltaBytesWriter(out);
   }
 
   @Override
@@ -66,8 +67,6 @@ public class SimpleStandardTermsIndexWri
     this.termsOut = termsOut;
   }
   
-  final private DeltaBytesWriter termWriter;
-
   @Override
   public FieldWriter addField(FieldInfo field) {
     SimpleFieldWriter writer = new SimpleFieldWriter(field);
@@ -78,33 +77,99 @@ public class SimpleStandardTermsIndexWri
   private class SimpleFieldWriter extends FieldWriter {
     final FieldInfo fieldInfo;
     int numIndexTerms;
-    private long lastTermsPointer;
     final long indexStart;
+    final long termsStart;
+    long packedIndexStart;
+    long packedOffsetsStart;
     private int numTerms;
 
+    // TODO: we could conceivably make a PackedInts wrapper
+    // that auto-grows... then we wouldn't force 6 bytes RAM
+    // per index term:
+    private short[] termLengths;
+    private int[] termsPointerDeltas;
+    private long lastTermsPointer;
+    private long totTermLength;
+
     SimpleFieldWriter(FieldInfo fieldInfo) {
       this.fieldInfo = fieldInfo;
       indexStart = out.getFilePointer();
-      termWriter.reset();
+      termsStart = lastTermsPointer = termsOut.getFilePointer();
+      termLengths = new short[0];
+      termsPointerDeltas = new int[0];
     }
 
     @Override
     public boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException {
       // First term is first indexed term:
       if (0 == (numTerms++ % termIndexInterval)) {
-        final long termsPointer = termsOut.getFilePointer();
+
         if (Codec.DEBUG) {
-          Codec.debug("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + text + " termsFP=" + termsPointer + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer());
+          Codec.debug("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + text.utf8ToString() + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer());
+        }
+
+        // write full bytes
+        out.writeBytes(text.bytes, text.offset, text.length);
+
+        if (termLengths.length == numIndexTerms) {
+          termLengths = ArrayUtil.grow(termLengths);
+        }
+        if (termsPointerDeltas.length == numIndexTerms) {
+          termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
         }
-        termWriter.write(text);
-        out.writeVLong(termsPointer - lastTermsPointer);
-        lastTermsPointer = termsPointer;
+
+        // save delta terms pointer
+        final long fp = termsOut.getFilePointer();
+        termsPointerDeltas[numIndexTerms] = (int) (fp - lastTermsPointer);
+        lastTermsPointer = fp;
+
+        // save term length (in bytes)
+        assert text.length <= Short.MAX_VALUE;
+        termLengths[numIndexTerms] = (short) text.length;
+
+        totTermLength += text.length;
+
         numIndexTerms++;
         return true;
       } else {
         return false;
       }
     }
+
+    @Override
+    public void finish() throws IOException {
+
+      // write primary terms dict offsets
+      packedIndexStart = out.getFilePointer();
+
+      final long maxValue = termsOut.getFilePointer();
+      PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(maxValue));
+
+      // relative to our indexStart
+      long upto = 0;
+      for(int i=0;i<numIndexTerms;i++) {
+        upto += termsPointerDeltas[i];
+        w.add(upto);
+      }
+      w.finish();
+
+      packedOffsetsStart = out.getFilePointer();
+
+      // write offsets into the byte[] terms
+      w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength));
+      upto = 0;
+      for(int i=0;i<numIndexTerms;i++) {
+        w.add(upto);
+        upto += termLengths[i];
+      }
+      w.add(upto);
+      w.finish();
+
+      // our referrer holds onto us, while other fields are
+      // being written, so don't tie up this RAM:
+      termLengths = null;
+      termsPointerDeltas = null;
+    }
   }
 
   @Override
@@ -123,7 +188,10 @@ public class SimpleStandardTermsIndexWri
       }
       out.writeInt(field.fieldInfo.number);
       out.writeInt(field.numIndexTerms);
+      out.writeLong(field.termsStart);
       out.writeLong(field.indexStart);
+      out.writeLong(field.packedIndexStart);
+      out.writeLong(field.packedOffsetsStart);
     }
     out.seek(CodecUtil.headerLength(CODEC_NAME));
     out.writeLong(dirStart);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Thu Mar 25 00:03:16 2010
@@ -372,7 +372,7 @@ public class StandardTermsDictReader ext
           indexReader.getIndexOffset(term, indexResult);
 
           if (Codec.DEBUG) {
-            Codec.debug(" index pos=" + indexResult.position + " termFP=" + indexResult.offset + " term=" + indexResult.term + " this=" + this);
+            Codec.debug(" index pos=" + indexResult.position + " termFP=" + indexResult.offset + " term=" + indexResult.term.utf8ToString() + " this=" + this);
           }
 
           in.seek(indexResult.offset);
@@ -507,6 +507,9 @@ public class StandardTermsDictReader ext
         }
         
         if (state.ord >= numTerms-1) {
+          if (Codec.DEBUG) {
+            Codec.debug("  return null ord=" + state.ord + " vs numTerms-1=" + (numTerms-1));
+          }
           return null;
         }
 
@@ -514,7 +517,7 @@ public class StandardTermsDictReader ext
         state.docFreq = in.readVInt();
 
         if (Codec.DEBUG) {
-          Codec.debug("  text=" + bytesReader.term + " freq=" + state.docFreq + " tis=" + in);
+          Codec.debug("  text=" + bytesReader.term.utf8ToString() + " freq=" + state.docFreq + " tis=" + in);
         }
 
         // TODO: would be cleaner, but space-wasting, to

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java Thu Mar 25 00:03:16 2010
@@ -205,7 +205,8 @@ public class StandardTermsDictWriter ext
 
     // Finishes all terms in this field
     @Override
-    public void finish() {
+    public void finish() throws IOException {
+      fieldIndexWriter.finish();
     }
   }
-}
\ No newline at end of file
+}

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java Thu Mar 25 00:03:16 2010
@@ -29,6 +29,7 @@ public abstract class StandardTermsIndex
 
   public abstract class FieldWriter {
     public abstract boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException;
+    public abstract void finish() throws IOException;
   }
 
   public abstract FieldWriter addField(FieldInfo fieldInfo);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/ArrayUtil.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/ArrayUtil.java Thu Mar 25 00:03:16 2010
@@ -232,6 +232,29 @@ public final class ArrayUtil {
       return currentSize;
   }
 
+  public static short[] grow(short[] array, int minSize) {
+    if (array.length < minSize) {
+      short[] newArray = new short[oversize(minSize, RamUsageEstimator.NUM_BYTES_SHORT)];
+      System.arraycopy(array, 0, newArray, 0, array.length);
+      return newArray;
+    } else
+      return array;
+  }
+
+  public static short[] grow(short[] array) {
+    return grow(array, 1 + array.length);
+  }
+
+  public static short[] shrink(short[] array, int targetSize) {
+    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
+    if (newSize != array.length) {
+      short[] newArray = new short[newSize];
+      System.arraycopy(array, 0, newArray, 0, newSize);
+      return newArray;
+    } else
+      return array;
+  }
+
   public static int[] grow(int[] array, int minSize) {
     if (array.length < minSize) {
       int[] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_INT)];

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed32.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed32.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed32.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed32.java Thu Mar 25 00:03:16 2010
@@ -129,6 +129,7 @@ class Packed32 extends PackedInts.Reader
     super(valueCount, bitsPerValue);
     int size = size(bitsPerValue, valueCount);
     blocks = new int[size + 1]; // +1 due to non-conditional tricks
+    // TODO: find a faster way to bulk-read ints...
     for(int i = 0 ; i < size ; i++) {
       blocks[i] = in.readInt();
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed64.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed64.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed64.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/Packed64.java Thu Mar 25 00:03:16 2010
@@ -148,6 +148,7 @@ class Packed64 extends PackedInts.Reader
     super(valueCount, bitsPerValue);
     int size = size(valueCount, bitsPerValue);
     blocks = new long[size+1]; // +1 due to non-conditional tricks
+    // TODO: find a faster way to bulk-read longs...
     for(int i=0;i<size;i++) {
       blocks[i] = in.readLong();
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedInts.java Thu Mar 25 00:03:16 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.util.packed;
  * limitations under the License.
  */
 
+import java.io.Closeable;
+
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.CodecUtil;
@@ -65,6 +67,18 @@ public class PackedInts {
   }
 
   /**
+   * Run-once iterator interface, to decode previously saved PackedInts.
+   */
+  public static interface ReaderIterator extends Closeable {
+    /** Returns next value */
+    long next() throws IOException;
+    /** Returns number of bits per value */
+    int getBitsPerValue();
+    /** Returns number of values */
+    int size();
+  }
+
+  /**
    * A packed integer array that can be modified.
    * @lucene.internal
    */
@@ -167,6 +181,22 @@ public class PackedInts {
   }
 
   /**
+   * Retrieve PackedInts as a {@link ReaderIterator}
+   * @param in positioned at the beginning of a stored packed int structure.
+   * @return an iterator to access the values
+   * @throws IOException if the structure could not be retrieved.
+   * @lucene.internal
+   */
+  public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
+    CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START);
+    final int bitsPerValue = in.readVInt();
+    assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
+    final int valueCount = in.readVInt();
+
+    return new PackedReaderIterator(bitsPerValue, valueCount, in);
+  }
+
+  /**
    * Create a packed integer array with the given amount of values initialized
    * to 0. the valueCount and the bitsPerValue cannot be changed after creation.
    * All Mutables known by this factory are kept fully in RAM.
@@ -228,7 +258,7 @@ public class PackedInts {
     } if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
       return 62;
     }
-    return (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0));
+    return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
   }
 
   /**

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java?rev=927234&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java Thu Mar 25 00:03:16 2010
@@ -0,0 +1,84 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexInput;
+
+import java.io.IOException;
+
+class PackedReaderIterator implements PackedInts.ReaderIterator {
+  private long pending;
+  private int pendingBitsLeft;
+  private final IndexInput in;
+  private final int bitsPerValue;
+  private final int valueCount;
+
+  // masks[n-1] masks for bottom n bits
+  private final long[] masks;
+
+  public PackedReaderIterator(int bitsPerValue, int valueCount, IndexInput in)
+    throws IOException {
+
+    this.valueCount = valueCount;
+    this.bitsPerValue = bitsPerValue;
+    
+    this.in = in;
+
+    masks = new long[bitsPerValue];
+
+    long v = 1;
+    for (int i = 0; i < bitsPerValue; i++) {
+      v *= 2;
+      masks[i] = v - 1;
+    }
+  }
+
+  public int getBitsPerValue() {
+    return bitsPerValue;
+  }
+
+  public int size() {
+    return valueCount;
+  }
+
+  public long next() throws IOException {
+    if (pendingBitsLeft == 0) {
+      pending = in.readLong();
+      pendingBitsLeft = 64;
+    }
+
+    if (pendingBitsLeft >= bitsPerValue) {
+      // not split
+      final long result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
+      pendingBitsLeft -= bitsPerValue;
+      return result;
+    } else {
+      // split
+      final int bits1 = bitsPerValue - pendingBitsLeft;
+      final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
+      pending = in.readLong();
+      final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
+      pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
+      return result1 | result2;
+    }
+  }
+
+  public void close() throws IOException {
+    in.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedWriter.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/packed/PackedWriter.java Thu Mar 25 00:03:16 2010
@@ -45,7 +45,7 @@ class PackedWriter extends PackedInts.Wr
     pendingBitPos = 64;
     masks = new long[bitsPerValue - 1];
 
-    int v = 1;
+    long v = 1;
     for (int i = 0; i < bitsPerValue - 1; i++) {
       v *= 2;
       masks[i] = v - 1;
@@ -104,7 +104,6 @@ class PackedWriter extends PackedInts.Wr
     if (pendingBitPos != 64) {
       out.writeLong(pending);
     }
-    out.writeLong(0L); // Dummy to compensate for not using conditionals
   }
 
   public String toString() {

Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java Thu Mar 25 00:03:16 2010
@@ -296,7 +296,13 @@ public class TestStressIndexing2 extends
       assertNull(MultiFields.getFields(r2));
       return;
     }
-    final TermsEnum termsEnum = f1.terms(idField).iterator();
+    final Terms terms1 = f1.terms(idField);
+    if (terms1 == null) {
+      assertTrue(MultiFields.getFields(r2) == null ||
+                 MultiFields.getFields(r2).terms(idField) == null);
+      return;
+    }
+    final TermsEnum termsEnum = terms1.iterator();
 
     final Bits delDocs1 = MultiFields.getDeletedDocs(r1);
     final Bits delDocs2 = MultiFields.getDeletedDocs(r2);

Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=927234&r1=927233&r2=927234&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/util/packed/TestPackedInts.java Thu Mar 25 00:03:16 2010
@@ -71,16 +71,28 @@ public class TestPackedInts extends Luce
           w.add(values[i]);
         }
         w.finish();
+        final long fp = out.getFilePointer();
         out.close();
 
         IndexInput in = d.openInput("out.bin");
         PackedInts.Reader r = PackedInts.getReader(in);
+        assertEquals(fp, in.getFilePointer());
         for(int i=0;i<valueCount;i++) {
           assertEquals("index=" + i + " ceil=" + ceil + " valueCount="
                   + valueCount + " nbits=" + nbits + " for "
                   + r.getClass().getSimpleName(), values[i], r.get(i));
         }
         in.close();
+
+        in = d.openInput("out.bin");
+        PackedInts.ReaderIterator r2 = PackedInts.getReaderIterator(in);
+        for(int i=0;i<valueCount;i++) {
+          assertEquals("index=" + i + " ceil=" + ceil + " valueCount="
+                  + valueCount + " nbits=" + nbits + " for "
+                  + r.getClass().getSimpleName(), values[i], r2.next());
+        }
+        assertEquals(fp, in.getFilePointer());
+        in.close();
         ceil *= 2;
       }
     }
@@ -193,4 +205,21 @@ public class TestPackedInts extends Luce
       }
     }
   }
+
+  public void testSingleValue() throws Exception {
+    Directory dir = new MockRAMDirectory();
+    IndexOutput out = dir.createOutput("out");
+    PackedInts.Writer w = PackedInts.getWriter(out, 1, 8);
+    w.add(17);
+    w.finish();
+    final long end = out.getFilePointer();
+    out.close();
+
+    IndexInput in = dir.openInput("out");
+    PackedInts.Reader r = PackedInts.getReader(in);
+    assertEquals(end, in.getFilePointer());
+    in.close();
+
+    dir.close();
+  }
 }



Mime
View raw message