lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mhern...@apache.org
Subject [02/50] [abbrv] git commit: another set of files
Date Tue, 24 Sep 2013 18:32:38 GMT
another set of files


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/64c13f3c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/64c13f3c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/64c13f3c

Branch: refs/heads/branch_4x
Commit: 64c13f3c5cf452252c0e45ff88860ebca5da0174
Parents: 666d83e
Author: Mike Potts <mike@feature23.com>
Authored: Wed Jul 10 23:15:58 2013 -0400
Committer: Mike Potts <mike@feature23.com>
Committed: Wed Jul 10 23:15:58 2013 -0400

----------------------------------------------------------------------
 .../CompressingStoredFieldsIndexReader.cs       | 175 +++++
 .../CompressingStoredFieldsReader.cs            |   2 +-
 .../CompressingStoredFieldsWriter.cs            | 756 +++++++++----------
 src/core/Codecs/Compressing/Compressor.cs       |   2 +-
 .../Compressing/GrowableByteArrayDataOutput.cs  |  34 +-
 src/core/Lucene.Net.csproj                      |   2 +
 6 files changed, 575 insertions(+), 396 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
new file mode 100644
index 0000000..f981b32
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
@@ -0,0 +1,175 @@
+´╗┐using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+    public sealed class CompressingStoredFieldsIndexReader: ICloneable //Closable??
+    {
+        int maxDoc;
+        int[] docBases;
+        long[] startPointers;
+        int[] avgChunkDocs;
+        long[] avgChunkSizes;
+        PackedInts.Reader[] docBasesDeltas; // delta from the avg
+        PackedInts.Reader[] startPointersDeltas; // delta from the avg
+
+        IndexInput fieldsIndexIn;
+
+        static long MoveLowOrderBitToSign(long n) 
+        {
+            return ((Number.URShift(n, 1) ^ -(n & 1)));
+        }
+
+        public CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si)

+        {
+            this.fieldsIndexIn = fieldsIndexIn;
+            maxDoc = si.DocCount;
+            int[] docBases = new int[16];
+            long[] startPointers = new long[16];
+            int[] avgChunkDocs = new int[16];
+            long[] avgChunkSizes = new long[16];
+            PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
+            PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];
+
+            int packedIntsVersion = fieldsIndexIn.ReadVInt();
+
+            int blockCount = 0;
+
+            for (;;) {
+              int numChunks = fieldsIndexIn.ReadVInt();
+              if (numChunks == 0) {
+                break;
+              }
+
+              if (blockCount == docBases.Length) {
+                int newSize = ArrayUtil.Oversize(blockCount + 1, 8);
+                docBases = Arrays.CopyOf(docBases, newSize);
+                startPointers = Arrays.CopyOf(startPointers, newSize);
+                avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize);
+                avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize);
+                docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize);
+                startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize);
+              }
+
+              // doc bases
+              docBases[blockCount] = fieldsIndexIn.ReadVInt();
+              avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt();
+              int bitsPerDocBase = fieldsIndexIn.ReadVInt();
+              if (bitsPerDocBase > 32) {
+                throw new CorruptIndexException("Corrupted");
+              }
+              docBasesDeltas[blockCount] = (Lucene.Net.Util.Packed.PackedInts.Reader)PackedInts.GetReaderNoHeader(fieldsIndexIn,
PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);
+
+              // start pointers
+              startPointers[blockCount] = fieldsIndexIn.ReadVLong();
+              avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong();
+              int bitsPerStartPointer = fieldsIndexIn.ReadVInt();
+              if (bitsPerStartPointer > 64) {
+                throw new CorruptIndexException("Corrupted");
+              }
+              startPointersDeltas[blockCount] = (Lucene.Net.Util.Packed.PackedInts.Reader)PackedInts.GetReaderNoHeader(fieldsIndexIn,
PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);
+
+              ++blockCount;
+            }
+
+            this.docBases = Arrays.CopyOf(docBases, blockCount);
+            this.startPointers = Arrays.CopyOf(startPointers, blockCount);
+            this.avgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount);
+            this.avgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount);
+            this.docBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount);
+            this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount);
+        }
+
+        private CompressingStoredFieldsIndexReader(CompressingStoredFieldsIndexReader other)
+        {
+            this.fieldsIndexIn = null;
+            this.maxDoc = other.maxDoc;
+            this.docBases = other.docBases;
+            this.startPointers = other.startPointers;
+            this.avgChunkDocs = other.avgChunkDocs;
+            this.avgChunkSizes = other.avgChunkSizes;
+            this.docBasesDeltas = other.docBasesDeltas;
+            this.startPointersDeltas = other.startPointersDeltas;
+        }
+
+        private int Block(int docID) 
+        {
+            int lo = 0, hi = docBases.Length - 1;
+            while (lo <= hi) {
+              int mid = Number.URShift(lo + hi, 1);
+              int midValue = docBases[mid];
+              if (midValue == docID) {
+                return mid;
+              } else if (midValue < docID) {
+                lo = mid + 1;
+              } else {
+                hi = mid - 1;
+              }
+            }
+            return hi;
+        }
+
+        private int relativeDocBase(int block, int relativeChunk) 
+        {
+            int expected = avgChunkDocs[block] * relativeChunk;
+            long delta = MoveLowOrderBitToSign(docBasesDeltas[block].Get(relativeChunk));
+            return expected + (int) delta;
+        }
+
+          private long relativeStartPointer(int block, int relativeChunk) 
+          {
+            long expected = avgChunkSizes[block] * relativeChunk;
+            long delta = MoveLowOrderBitToSign(startPointersDeltas[block].Get(relativeChunk));
+            return expected + delta;
+          }
+
+          private int relativeChunk(int block, int relativeDoc) 
+          {
+            int lo = 0, hi = docBasesDeltas[block].Size() - 1;
+            while (lo <= hi) {
+              int mid = Number.URShift(lo + hi, 1);
+              int midValue = relativeDocBase(block, mid);
+              if (midValue == relativeDoc) {
+                return mid;
+              } else if (midValue < relativeDoc) {
+                lo = mid + 1;
+              } else {
+                hi = mid - 1;
+              }
+            }
+            return hi;
+          }
+
+          private long getStartPointer(int docID) 
+          {
+            if (docID < 0 || docID >= maxDoc) {
+              throw new ArgumentException("docID out of range [0-" + maxDoc + "]: " + docID);
+            }
+            int block = Block(docID);
+            int relativeChunk = this.relativeChunk(block, docID - docBases[block]);
+            return startPointers[block] + relativeStartPointer(block, relativeChunk);
+          }
+
+          public override CompressingStoredFieldsIndexReader clone() 
+          {
+            if (fieldsIndexIn == null) {
+              return this;
+            } else {
+              return new CompressingStoredFieldsIndexReader(this);
+            }
+          }
+
+          public override void close()
+          {
+            IOUtils.Close(fieldsIndexIn);
+          }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
index ad909ce..9c55e07 100644
--- a/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs
@@ -59,7 +59,7 @@ public sealed class CompressingStoredFieldsReader: StoredFieldsReader {
     string segment = si.name;
     bool success = false;
     fieldInfos = fn;
-    numDocs = si.getDocCount();
+    numDocs = si.DocCount;
     IndexInput indexStream = null;
     try {
       fieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
context);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
index 54882fd..1543196 100644
--- a/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs
@@ -1,413 +1,391 @@
-package org.apache.lucene.codecs.compressing;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
-import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.StoredFieldsReader;
-import org.apache.lucene.codecs.StoredFieldsWriter;
-import org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.ChunkIterator;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.PackedInts;
-
-/**
- * {@link StoredFieldsWriter} impl for {@link CompressingStoredFieldsFormat}.
- * @lucene.experimental
- */
-public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
-
-  // hard limit on the maximum number of documents per chunk
-  static final int MAX_DOCUMENTS_PER_CHUNK = 128;
-
-  static final int         STRING = 0x00;
-  static final int       BYTE_ARR = 0x01;
-  static final int    NUMERIC_INT = 0x02;
-  static final int  NUMERIC_FLOAT = 0x03;
-  static final int   NUMERIC_LONG = 0x04;
-  static final int NUMERIC_DOUBLE = 0x05;
-
-  static final int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
-  static final int TYPE_MASK = (int) PackedInts.maxValue(TYPE_BITS);
-
-  static final String CODEC_SFX_IDX = "Index";
-  static final String CODEC_SFX_DAT = "Data";
-  static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
-
-  private final Directory directory;
-  private final String segment;
-  private final String segmentSuffix;
-  private CompressingStoredFieldsIndexWriter indexWriter;
-  private IndexOutput fieldsStream;
-
-  private final CompressionMode compressionMode;
-  private final Compressor compressor;
-  private final int chunkSize;
-
-  private final GrowableByteArrayDataOutput bufferedDocs;
-  private int[] numStoredFields; // number of stored fields
-  private int[] endOffsets; // end offsets in bufferedDocs
-  private int docBase; // doc ID at the beginning of the chunk
-  private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
-
-  /** Sole constructor. */
-  public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix,
IOContext context,
-      String formatName, CompressionMode compressionMode, int chunkSize) throws IOException
{
-    assert directory != null;
-    this.directory = directory;
-    this.segment = si.name;
-    this.segmentSuffix = segmentSuffix;
-    this.compressionMode = compressionMode;
-    this.compressor = compressionMode.newCompressor();
-    this.chunkSize = chunkSize;
-    this.docBase = 0;
-    this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
-    this.numStoredFields = new int[16];
-    this.endOffsets = new int[16];
-    this.numBufferedDocs = 0;
-
-    boolean success = false;
-    IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment,
segmentSuffix, FIELDS_INDEX_EXTENSION), context);
-    try {
-      fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix,
FIELDS_EXTENSION), context);
-
-      final String codecNameIdx = formatName + CODEC_SFX_IDX;
-      final String codecNameDat = formatName + CODEC_SFX_DAT;
-      CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
-      CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
-      assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
-      assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
-
-      indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
-      indexStream = null;
-
-      fieldsStream.writeVInt(PackedInts.VERSION_CURRENT);
-
-      success = true;
-    } finally {
-      if (!success) {
-        IOUtils.closeWhileHandlingException(indexStream);
-        abort();
-      }
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    try {
-      IOUtils.close(fieldsStream, indexWriter);
-    } finally {
-      fieldsStream = null;
-      indexWriter = null;
-    }
-  }
-
-  @Override
-  public void startDocument(int numStoredFields) throws IOException {
-    if (numBufferedDocs == this.numStoredFields.length) {
-      final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
-      this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
-      endOffsets = Arrays.copyOf(endOffsets, newLength);
-    }
-    this.numStoredFields[numBufferedDocs] = numStoredFields;
-    ++numBufferedDocs;
-  }
-
-  @Override
-  public void finishDocument() throws IOException {
-    endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
-    if (triggerFlush()) {
-      flush();
-    }
-  }
-
-  private static void saveInts(int[] values, int length, DataOutput out) throws IOException
{
-    assert length > 0;
-    if (length == 1) {
-      out.writeVInt(values[0]);
-    } else {
-      boolean allEqual = true;
-      for (int i = 1; i < length; ++i) {
-        if (values[i] != values[0]) {
-          allEqual = false;
-          break;
-        }
-      }
-      if (allEqual) {
-        out.writeVInt(0);
-        out.writeVInt(values[0]);
-      } else {
-        long max = 0;
-        for (int i = 0; i < length; ++i) {
-          max |= values[i];
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+    public sealed class CompressingStoredFieldsWriter : StoredFieldsWriter
+    {
+        static readonly int MAX_DOCUMENTS_PER_CHUNK = 128;
+        static readonly int STRING = 0x00;
+        static readonly int BYTE_ARR = 0x01;
+        static readonly int NUMERIC_INT = 0x02;
+        static readonly int NUMERIC_FLOAT = 0x03;
+        static readonly int NUMERIC_LONG = 0x04;
+        static readonly int NUMERIC_DOUBLE = 0x05;
+
+        static readonly int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
+        static readonly int TYPE_MASK = (int)PackedInts.maxValue(TYPE_BITS);
+
+        static readonly String CODEC_SFX_IDX = "Index";
+        static readonly String CODEC_SFX_DAT = "Data";
+        static readonly int VERSION_START = 0;
+        static readonly int VERSION_CURRENT = VERSION_START;
+
+        private Directory directory;
+        private string segment;
+        private string segmentSuffix;
+        private CompressingStoredFieldsIndexWriter indexWriter;
+        private IndexOutput fieldsStream;
+
+        private CompressionMode compressionMode;
+        private Compressor compressor;
+        private int chunkSize;
+
+        private GrowableByteArrayDataOutput bufferedDocs;
+        private int[] numStoredFields; // number of stored fields
+        private int[] endOffsets; // end offsets in bufferedDocs
+        private int docBase; // doc ID at the beginning of the chunk
+        private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
+
+        public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string
segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int
chunkSize) 
+        {
+          this.directory = directory;
+          this.segment = si.name;
+          this.segmentSuffix = segmentSuffix;
+          this.compressionMode = compressionMode;
+          this.compressor = compressionMode.newCompressor();
+          this.chunkSize = chunkSize;
+          this.docBase = 0;
+          this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
+          this.numStoredFields = new int[16];
+          this.endOffsets = new int[16];
+          this.numBufferedDocs = 0;
+
+          bool success = false;
+          IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment,
segmentSuffix, FIELDS_INDEX_EXTENSION), context);
+          try 
+          {
+            fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment,
segmentSuffix, FIELDS_EXTENSION), context);
+
+            string codecNameIdx = formatName + CODEC_SFX_IDX;
+            string codecNameDat = formatName + CODEC_SFX_DAT;
+            CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT);
+            CodecUtil.WriteHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
+
+            indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
+            indexStream = null;
+
+            fieldsStream.WriteVInt(PackedInts.VERSION_CURRENT);
+
+            success = true;
+          } 
+          finally 
+          {
+            if (!success) {
+              IOUtils.CloseWhileHandlingException(indexStream);
+              abort();
+            }
+          }
         }
-        final int bitsRequired = PackedInts.bitsRequired(max);
-        out.writeVInt(bitsRequired);
-        final PackedInts.Writer w = PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED,
length, bitsRequired, 1);
-        for (int i = 0; i < length; ++i) {
-          w.add(values[i]);
+
+        public override void Close()
+        {
+            try
+            {
+                IOUtils.Close(fieldsStream, indexWriter);
+            }
+            finally
+            {
+                fieldsStream = null;
+                indexWriter = null;
+            }
         }
-        w.finish();
-      }
-    }
-  }
 
-  private void writeHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[]
lengths) throws IOException {
-    // save docBase and numBufferedDocs
-    fieldsStream.writeVInt(docBase);
-    fieldsStream.writeVInt(numBufferedDocs);
+        public override void StartDocument(int numStoredFields)
+        {
+            if (numBufferedDocs == this.numStoredFields.Length)
+            {
+                int newLength = ArrayUtil.Oversize(numBufferedDocs + 1, 4);
+                this.numStoredFields = Arrays.CopyOf(this.numStoredFields, newLength);
+                endOffsets = Arrays.CopyOf(endOffsets, newLength);
+            }
+            this.numStoredFields[numBufferedDocs] = numStoredFields;
+            ++numBufferedDocs;
+        }
 
-    // save numStoredFields
-    saveInts(numStoredFields, numBufferedDocs, fieldsStream);
+        public override void FinishDocument()
+        {
+            endOffsets[numBufferedDocs - 1] = bufferedDocs.Length;
+            if (TriggerFlush())
+            {
+                Flush();
+            }
+        }
 
-    // save lengths
-    saveInts(lengths, numBufferedDocs, fieldsStream);
-  }
+        private static void saveInts(int[] values, int length, DataOutput output) 
+        {
+          if (length == 1) 
+          {
+            output.WriteVInt(values[0]);
+          } 
+          else 
+          {
+            bool allEqual = true;
+            for (int i = 1; i < length; ++i) {
+              if (values[i] != values[0]) {
+                allEqual = false;
+                //break;
+              }
+            }
+            if (allEqual) {
+              output.WriteVInt(0);
+              output.WriteVInt(values[0]);
+            } 
+            else 
+            {
+              long max = 0;
+              for (int i = 0; i < length; ++i) {
+                max |= values[i];
+              }
+              int bitsRequired = PackedInts.BitsRequired(max);
+              output.WriteVInt(bitsRequired);
+              PackedInts.Writer w = PackedInts.GetWriterNoHeader(output, PackedInts.Format.PACKED,
length, bitsRequired, 1);
+              for (int i = 0; i < length; ++i) {
+                w.Add(values[i]);
+              }
+              w.Finish();
+            }
+          }
+        }
 
-  private boolean triggerFlush() {
-    return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes
-        numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK;
-  }
+        private void WriteHeader(int docBase, int numBufferedDocs, int[] numStoredFields,
int[] lengths)
+        {
+            // save docBase and numBufferedDocs
+            fieldsStream.WriteVInt(docBase);
+            fieldsStream.WriteVInt(numBufferedDocs);
 
-  private void flush() throws IOException {
-    indexWriter.writeIndex(numBufferedDocs, fieldsStream.getFilePointer());
+            // save numStoredFields
+            saveInts(numStoredFields, numBufferedDocs, fieldsStream);
 
-    // transform end offsets into lengths
-    final int[] lengths = endOffsets;
-    for (int i = numBufferedDocs - 1; i > 0; --i) {
-      lengths[i] = endOffsets[i] - endOffsets[i - 1];
-      assert lengths[i] >= 0;
-    }
-    writeHeader(docBase, numBufferedDocs, numStoredFields, lengths);
-
-    // compress stored fields to fieldsStream
-    compressor.compress(bufferedDocs.bytes, 0, bufferedDocs.length, fieldsStream);
-
-    // reset
-    docBase += numBufferedDocs;
-    numBufferedDocs = 0;
-    bufferedDocs.length = 0;
-  }
-
-  @Override
-  public void writeField(FieldInfo info, IndexableField field)
-      throws IOException {
-    int bits = 0;
-    final BytesRef bytes;
-    final String string;
-
-    Number number = field.numericValue();
-    if (number != null) {
-      if (number instanceof Byte || number instanceof Short || number instanceof Integer)
{
-        bits = NUMERIC_INT;
-      } else if (number instanceof Long) {
-        bits = NUMERIC_LONG;
-      } else if (number instanceof Float) {
-        bits = NUMERIC_FLOAT;
-      } else if (number instanceof Double) {
-        bits = NUMERIC_DOUBLE;
-      } else {
-        throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
-      }
-      string = null;
-      bytes = null;
-    } else {
-      bytes = field.binaryValue();
-      if (bytes != null) {
-        bits = BYTE_ARR;
-        string = null;
-      } else {
-        bits = STRING;
-        string = field.stringValue();
-        if (string == null) {
-          throw new IllegalArgumentException("field " + field.name() + " is stored but does
not have binaryValue, stringValue nor numericValue");
+            // save lengths
+            saveInts(lengths, numBufferedDocs, fieldsStream);
         }
-      }
-    }
 
-    final long infoAndBits = (((long) info.number) << TYPE_BITS) | bits;
-    bufferedDocs.writeVLong(infoAndBits);
-
-    if (bytes != null) {
-      bufferedDocs.writeVInt(bytes.length);
-      bufferedDocs.writeBytes(bytes.bytes, bytes.offset, bytes.length);
-    } else if (string != null) {
-      bufferedDocs.writeString(field.stringValue());
-    } else {
-      if (number instanceof Byte || number instanceof Short || number instanceof Integer)
{
-        bufferedDocs.writeInt(number.intValue());
-      } else if (number instanceof Long) {
-        bufferedDocs.writeLong(number.longValue());
-      } else if (number instanceof Float) {
-        bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue()));
-      } else if (number instanceof Double) {
-        bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue()));
-      } else {
-        throw new AssertionError("Cannot get here");
-      }
-    }
-  }
-
-  @Override
-  public void abort() {
-    IOUtils.closeWhileHandlingException(this);
-    IOUtils.deleteFilesIgnoringExceptions(directory,
-        IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
-        IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION));
-  }
-
-  @Override
-  public void finish(FieldInfos fis, int numDocs) throws IOException {
-    if (numBufferedDocs > 0) {
-      flush();
-    } else {
-      assert bufferedDocs.length == 0;
-    }
-    if (docBase != numDocs) {
-      throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs="
+ numDocs);
-    }
-    indexWriter.finish(numDocs);
-    assert bufferedDocs.length == 0;
-  }
-
-  @Override
-  public int merge(MergeState mergeState) throws IOException {
-    int docCount = 0;
-    int idx = 0;
-
-    for (AtomicReader reader : mergeState.readers) {
-      final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
-      CompressingStoredFieldsReader matchingFieldsReader = null;
-      if (matchingSegmentReader != null) {
-        final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
-        // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
-        if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader)
{
-          matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
+        private bool TriggerFlush()
+        {
+            return bufferedDocs.Length >= chunkSize || // chunks of at least chunkSize
bytes
+                numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK;
         }
-      }
-
-      final int maxDoc = reader.maxDoc();
-      final Bits liveDocs = reader.getLiveDocs();
-
-      if (matchingFieldsReader == null) {
-        // naive merge...
-        for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i +
1, liveDocs, maxDoc)) {
-          Document doc = reader.document(i);
-          addDocument(doc, mergeState.fieldInfos);
-          ++docCount;
-          mergeState.checkAbort.work(300);
+
+        private void Flush()
+        {
+            indexWriter.WriteIndex(numBufferedDocs, fieldsStream.FilePointer);
+
+            // transform end offsets into lengths
+            int[] lengths = endOffsets;
+            for (int i = numBufferedDocs - 1; i > 0; --i)
+            {
+                lengths[i] = endOffsets[i] - endOffsets[i - 1];
+            }
+
+            WriteHeader(docBase, numBufferedDocs, numStoredFields, lengths);
+
+            // compress stored fields to fieldsStream
+            compressor.Compress(bufferedDocs.Bytes, 0, bufferedDocs.Length, fieldsStream);
+
+            // reset
+            docBase += numBufferedDocs;
+            numBufferedDocs = 0;
+            bufferedDocs.Length = 0;
         }
-      } else {
-        int docID = nextLiveDoc(0, liveDocs, maxDoc);
-        if (docID < maxDoc) {
-          // not all docs were deleted
-          final ChunkIterator it = matchingFieldsReader.chunkIterator(docID);
-          int[] startOffsets = new int[0];
-          do {
-            // go to the next chunk that contains docID
-            it.next(docID);
-            // transform lengths into offsets
-            if (startOffsets.length < it.chunkDocs) {
-              startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)];
+
+        public override void writeField(FieldInfo info, IndexableField field)
+        {
+          int bits = 0;
+          BytesRef bytes;
+          string str;
+
+          Number number = field.numericValue();
+          if (number != null) {
+            if (number instanceof Byte || number instanceof Short || number instanceof Integer)
{
+              bits = NUMERIC_INT;
+            } else if (number instanceof Long) {
+              bits = NUMERIC_LONG;
+            } else if (number instanceof Float) {
+              bits = NUMERIC_FLOAT;
+            } else if (number instanceof Double) {
+              bits = NUMERIC_DOUBLE;
+            } else {
+              throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
+            }
+            str = null;
+            bytes = null;
+          } else {
+            bytes = field.binaryValue();
+            if (bytes != null) {
+              bits = BYTE_ARR;
+              str = null;
+            } else {
+              bits = STRING;
+              str = field.stringValue();
+              if (str == null) {
+                throw new ArgumentException("field " + field.name() + " is stored but does
not have binaryValue, stringValue nor numericValue");
+              }
             }
-            for (int i = 1; i < it.chunkDocs; ++i) {
-              startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
+          }
+
+          long infoAndBits = (((long) info.number) << TYPE_BITS) | bits;
+          bufferedDocs.WriteVLong(infoAndBits);
+
+          if (bytes != null) {
+            bufferedDocs.WriteVInt(bytes.length);
+            bufferedDocs.WriteBytes(bytes.bytes, bytes.offset, bytes.length);
+          } else if (str != null) {
+            bufferedDocs.WriteString(field.stringValue());
+          } else {
+            if (number instanceof Byte || number instanceof Short || number instanceof Integer)
{
+              bufferedDocs.writeInt(number.intValue());
+            } else if (number instanceof Long) {
+              bufferedDocs.writeLong(number.longValue());
+            } else if (number instanceof Float) {
+              bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue()));
+            } else if (number instanceof Double) {
+              bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue()));
+            } else {
+              throw new AssertionError("Cannot get here");
             }
+          }
+        }
 
-            if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression
mode
-                && numBufferedDocs == 0 // starting a new chunk
-                && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small
enough
-                && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1]
>= chunkSize // chunk is large enough
-                && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs)
== it.docBase + it.chunkDocs) { // no deletion in the chunk
-              assert docID == it.docBase;
-
-              // no need to decompress, just copy data
-              indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
-              writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
-              it.copyCompressedData(fieldsStream);
-              this.docBase += it.chunkDocs;
-              docID = nextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
-              docCount += it.chunkDocs;
-              mergeState.checkAbort.work(300 * it.chunkDocs);
-            } else {
-              // decompress
-              it.decompress();
-              if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length)
{
-                throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs
- 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
+        public override void Abort() {
+          IOUtils.CloseWhileHandlingException(this);
+          IOUtils.DeleteFilesIgnoringExceptions(directory,
+              IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
+              IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION));
+        }
+
+        public override void finish(FieldInfos fis, int numDocs) 
+        {
+          if (numBufferedDocs > 0) {
+            Flush();
+          } else {
+            //assert bufferedDocs.length == 0;
+          }
+          if (docBase != numDocs) {
+            throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs="
+ numDocs);
+          }
+          indexWriter.finish(numDocs);
+        }
+
+        public override int Merge(MergeState mergeState) 
+        {
+          int docCount = 0;
+          int idx = 0;
+
+          foreach (AtomicReader reader in mergeState.readers) 
+          {
+            SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
+            CompressingStoredFieldsReader matchingFieldsReader = null;
+            if (matchingSegmentReader != null) 
+            {
+              StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
+              // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
+              if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader)

+              {
+                matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
               }
-              // copy non-deleted docs
-              for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1,
liveDocs, maxDoc)) {
-                final int diff = docID - it.docBase;
-                startDocument(it.numStoredFields[diff]);
-                bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff],
it.lengths[diff]);
-                finishDocument();
+            }
+
+            int maxDoc = reader.MaxDoc;
+            IBits liveDocs = reader.LiveDocs;
+
+            if (matchingFieldsReader == null) {
+              // naive merge...
+              for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i
+ 1, liveDocs, maxDoc)) {
+                Document doc = reader.Document(i);
+                AddDocument(doc, mergeState.fieldInfos);
                 ++docCount;
-                mergeState.checkAbort.work(300);
+                mergeState.checkAbort.Work(300);
+              }
+            } else {
+              int docID = NextLiveDoc(0, liveDocs, maxDoc);
+              if (docID < maxDoc) {
+                // not all docs were deleted
+                ChunkIterator it = matchingFieldsReader.ChunkIterator(docID);
+                int[] startOffsets = new int[0];
+                do {
+                  // go to the next chunk that contains docID
+                  it.next(docID);
+                  // transform lengths into offsets
+                  if (startOffsets.Length < it.chunkDocs) {
+                    startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)];
+                  }
+                  for (int i = 1; i < it.chunkDocs; ++i) {
+                    startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
+                  }
+
+                  if (compressionMode == matchingFieldsReader.getCompressionMode() // same
compression mode
+                      && numBufferedDocs == 0 // starting a new chunk
+                      && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is
small enough
+                      && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs
- 1] >= chunkSize // chunk is large enough
+                      && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs)
== it.docBase + it.chunkDocs) { // no deletion in the chunk
+
+                    // no need to decompress, just copy data
+                    indexWriter.writeIndex(it.chunkDocs, fieldsStream.FilePointer);
+                    WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
+                    it.copyCompressedData(fieldsStream);
+                    this.docBase += it.chunkDocs;
+                    docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
+                    docCount += it.chunkDocs;
+                    mergeState.checkAbort.Work(300 * it.chunkDocs);
+                  } else {
+                    // decompress
+                    it.decompress();
+                    if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] !=
it.bytes.length) {
+                      throw new CorruptIndexException("Corrupted: expected chunk size=" +
startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
+                    }
+                    // copy non-deleted docs
+                    for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID
+ 1, liveDocs, maxDoc)) {
+                      int diff = docID - it.docBase;
+                      StartDocument(it.numStoredFields[diff]);
+                      bufferedDocs.WriteBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff],
it.lengths[diff]);
+                      FinishDocument();
+                      ++docCount;
+                      mergeState.checkAbort.Work(300);
+                    }
+                  }
+                } while (docID < maxDoc);
               }
             }
-          } while (docID < maxDoc);
+          }
+
+          Finish(mergeState.fieldInfos, docCount);
+          return docCount;
         }
-      }
-    }
-    finish(mergeState.fieldInfos, docCount);
-    return docCount;
-  }
 
-  private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) {
-    if (liveDocs == null) {
-      return doc;
-    }
-    while (doc < maxDoc && !liveDocs.get(doc)) {
-      ++doc;
-    }
-    return doc;
-  }
+        private static int NextLiveDoc(int doc, IBits liveDocs, int maxDoc)
+        {
+            if (liveDocs == null)
+            {
+                return doc;
+            }
+            while (doc < maxDoc && !liveDocs[doc])
+            {
+                ++doc;
+            }
+            return doc;
+        }
 
-  private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) {
-    if (liveDocs == null) {
-      return maxDoc;
-    }
-    while (doc < maxDoc && liveDocs.get(doc)) {
-      ++doc;
-    }
-    return doc;
-  }
+        private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc)
+        {
+            if (liveDocs == null)
+            {
+                return maxDoc;
+            }
+            while (doc < maxDoc && liveDocs[doc])
+            {
+                ++doc;
+            }
+            return doc;
+        }
 
+    }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/Compressor.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/Compressor.cs b/src/core/Codecs/Compressing/Compressor.cs
index 48fdb74..00c0053 100644
--- a/src/core/Codecs/Compressing/Compressor.cs
+++ b/src/core/Codecs/Compressing/Compressor.cs
@@ -32,7 +32,7 @@ namespace Lucene.Net.Codecs.Compressing
          * compressor to add all necessary information so that a {@link Decompressor}
          * will know when to stop decompressing bytes from the stream.
          */
-        public abstract void Compress(byte[] bytes, int off, int len, DataOutput output);
+        public abstract void Compress(sbyte[] bytes, int off, int len, DataOutput output);
 
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs b/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
index a0b8eba..d6b873d 100644
--- a/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
+++ b/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs
@@ -25,13 +25,37 @@ namespace Lucene.Net.Codecs.Compressing
 {
     internal sealed class GrowableByteArrayDataOutput : DataOutput
     {
-        sbyte[] bytes;
-        int length;
+        private sbyte[] _bytes;
+        private int _length;
 
-        GrowableByteArrayDataOutput(int cp)
+        public GrowableByteArrayDataOutput(int cp)
         {
-            this.bytes = new sbyte[ArrayUtil.Oversize(cp, 1)];
-            this.length = 0;
+            Bytes = new sbyte[ArrayUtil.Oversize(cp, 1)];
+            Length = 0;
+        }
+
+        public sbyte[] Bytes
+        {
+            get
+            {
+                return _bytes;
+            }
+            set
+            {
+                _bytes = value;
+            }
+        }
+
+        public int Length
+        {
+            get
+            {
+                return _length;
+            }
+            set
+            {
+                _length = value;
+            }
         }
 
         public override void WriteByte(byte b)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/core/Lucene.Net.csproj b/src/core/Lucene.Net.csproj
index ce097bb..85f9818 100644
--- a/src/core/Lucene.Net.csproj
+++ b/src/core/Lucene.Net.csproj
@@ -187,7 +187,9 @@
     <Compile Include="Codecs\Codec.cs" />
     <Compile Include="Codecs\CodecUtil.cs" />
     <Compile Include="Codecs\Compressing\CompressingStoredFieldsFormat.cs" />
+    <Compile Include="Codecs\Compressing\CompressingStoredFieldsIndexReader.cs" />
     <Compile Include="Codecs\Compressing\CompressingStoredFieldsReader.cs" />
+    <Compile Include="Codecs\Compressing\CompressingStoredFieldsWriter.cs" />
     <Compile Include="Codecs\Compressing\CompressionMode.cs" />
     <Compile Include="Codecs\Compressing\Compressor.cs" />
     <Compile Include="Codecs\Compressing\Decompressor.cs" />


Mime
View raw message