Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7A449100DA for ; Tue, 24 Sep 2013 18:33:47 +0000 (UTC) Received: (qmail 52852 invoked by uid 500); 24 Sep 2013 18:32:59 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 52523 invoked by uid 500); 24 Sep 2013 18:32:45 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 52302 invoked by uid 99); 24 Sep 2013 18:32:38 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 24 Sep 2013 18:32:38 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id D526F908D52; Tue, 24 Sep 2013 18:32:37 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: mherndon@apache.org To: commits@lucenenet.apache.org Date: Tue, 24 Sep 2013 18:32:38 -0000 Message-Id: In-Reply-To: <8b7c9e275e3f4790ac1649e325543ff3@git.apache.org> References: <8b7c9e275e3f4790ac1649e325543ff3@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [02/50] [abbrv] git commit: another set of files another set of files Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/64c13f3c Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/64c13f3c Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/64c13f3c Branch: refs/heads/branch_4x Commit: 64c13f3c5cf452252c0e45ff88860ebca5da0174 Parents: 666d83e Author: Mike Potts Authored: Wed Jul 10 23:15:58 2013 -0400 Committer: Mike Potts Committed: Wed Jul 10 23:15:58 2013 -0400 ---------------------------------------------------------------------- .../CompressingStoredFieldsIndexReader.cs | 175 +++++ .../CompressingStoredFieldsReader.cs | 2 +- .../CompressingStoredFieldsWriter.cs | 756 +++++++++---------- src/core/Codecs/Compressing/Compressor.cs | 2 +- .../Compressing/GrowableByteArrayDataOutput.cs | 34 +- src/core/Lucene.Net.csproj | 2 + 6 files changed, 575 insertions(+), 396 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs ---------------------------------------------------------------------- diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs new file mode 100644 index 0000000..f981b32 --- /dev/null +++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs @@ -0,0 +1,175 @@ +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Lucene.Net.Util.Packed; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Codecs.Compressing +{ + public sealed class CompressingStoredFieldsIndexReader: ICloneable //Closable?? + { + int maxDoc; + int[] docBases; + long[] startPointers; + int[] avgChunkDocs; + long[] avgChunkSizes; + PackedInts.Reader[] docBasesDeltas; // delta from the avg + PackedInts.Reader[] startPointersDeltas; // delta from the avg + + IndexInput fieldsIndexIn; + + static long MoveLowOrderBitToSign(long n) + { + return ((Number.URShift(n, 1) ^ -(n & 1))); + } + + public CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) + { + this.fieldsIndexIn = fieldsIndexIn; + maxDoc = si.DocCount; + int[] docBases = new int[16]; + long[] startPointers = new long[16]; + int[] avgChunkDocs = new int[16]; + long[] avgChunkSizes = new long[16]; + PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16]; + PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16]; + + int packedIntsVersion = fieldsIndexIn.ReadVInt(); + + int blockCount = 0; + + for (;;) { + int numChunks = fieldsIndexIn.ReadVInt(); + if (numChunks == 0) { + break; + } + + if (blockCount == docBases.Length) { + int newSize = ArrayUtil.Oversize(blockCount + 1, 8); + docBases = Arrays.CopyOf(docBases, newSize); + startPointers = Arrays.CopyOf(startPointers, newSize); + avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize); + avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize); + docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize); + startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize); + } + + // doc bases + docBases[blockCount] = fieldsIndexIn.ReadVInt(); + avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt(); + int bitsPerDocBase = fieldsIndexIn.ReadVInt(); + if (bitsPerDocBase > 32) { + throw new CorruptIndexException("Corrupted"); + } + docBasesDeltas[blockCount] = (Lucene.Net.Util.Packed.PackedInts.Reader)PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); + + // start pointers + startPointers[blockCount] = fieldsIndexIn.ReadVLong(); + avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong(); + int bitsPerStartPointer = fieldsIndexIn.ReadVInt(); + if (bitsPerStartPointer > 64) { + throw new CorruptIndexException("Corrupted"); + } + startPointersDeltas[blockCount] = (Lucene.Net.Util.Packed.PackedInts.Reader)PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); + + ++blockCount; + } + + this.docBases = Arrays.CopyOf(docBases, blockCount); + this.startPointers = Arrays.CopyOf(startPointers, blockCount); + this.avgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount); + this.avgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount); + this.docBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount); + this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount); + } + + private CompressingStoredFieldsIndexReader(CompressingStoredFieldsIndexReader other) + { + this.fieldsIndexIn = null; + this.maxDoc = other.maxDoc; + this.docBases = other.docBases; + this.startPointers = other.startPointers; + this.avgChunkDocs = other.avgChunkDocs; + this.avgChunkSizes = other.avgChunkSizes; + this.docBasesDeltas = other.docBasesDeltas; + this.startPointersDeltas = other.startPointersDeltas; + } + + private int Block(int docID) + { + int lo = 0, hi = docBases.Length - 1; + while (lo <= hi) { + int mid = Number.URShift(lo + hi, 1); + int midValue = docBases[mid]; + if (midValue == docID) { + return mid; + } else if (midValue < docID) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return hi; + } + + private int relativeDocBase(int block, int relativeChunk) + { + int expected = avgChunkDocs[block] * relativeChunk; + long delta = MoveLowOrderBitToSign(docBasesDeltas[block].Get(relativeChunk)); + return expected + (int) delta; + } + + private long relativeStartPointer(int block, int relativeChunk) + { + long expected = avgChunkSizes[block] * relativeChunk; + long delta = MoveLowOrderBitToSign(startPointersDeltas[block].Get(relativeChunk)); + return expected + delta; + } + + private int relativeChunk(int block, int relativeDoc) + { + int lo = 0, hi = docBasesDeltas[block].Size() - 1; + while (lo <= hi) { + int mid = Number.URShift(lo + hi, 1); + int midValue = relativeDocBase(block, mid); + if (midValue == relativeDoc) { + return mid; + } else if (midValue < relativeDoc) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return hi; + } + + private long getStartPointer(int docID) + { + if (docID < 0 || docID >= maxDoc) { + throw new ArgumentException("docID out of range [0-" + maxDoc + "]: " + docID); + } + int block = Block(docID); + int relativeChunk = this.relativeChunk(block, docID - docBases[block]); + return startPointers[block] + relativeStartPointer(block, relativeChunk); + } + + public override CompressingStoredFieldsIndexReader clone() + { + if (fieldsIndexIn == null) { + return this; + } else { + return new CompressingStoredFieldsIndexReader(this); + } + } + + public override void close() + { + IOUtils.Close(fieldsIndexIn); + } + + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs ---------------------------------------------------------------------- diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs index ad909ce..9c55e07 100644 --- a/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs +++ b/src/core/Codecs/Compressing/CompressingStoredFieldsReader.cs @@ -59,7 +59,7 @@ public sealed class CompressingStoredFieldsReader: StoredFieldsReader { string segment = si.name; bool success = false; fieldInfos = fn; - numDocs = si.getDocCount(); + numDocs = si.DocCount; IndexInput indexStream = null; try { fieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs ---------------------------------------------------------------------- diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs index 54882fd..1543196 100644 --- a/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs +++ b/src/core/Codecs/Compressing/CompressingStoredFieldsWriter.cs @@ -1,413 +1,391 @@ -package org.apache.lucene.codecs.compressing; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION; -import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION; - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.StoredFieldsReader; -import org.apache.lucene.codecs.StoredFieldsWriter; -import org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.ChunkIterator; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.packed.PackedInts; - -/** - * {@link StoredFieldsWriter} impl for {@link CompressingStoredFieldsFormat}. - * @lucene.experimental - */ -public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { - - // hard limit on the maximum number of documents per chunk - static final int MAX_DOCUMENTS_PER_CHUNK = 128; - - static final int STRING = 0x00; - static final int BYTE_ARR = 0x01; - static final int NUMERIC_INT = 0x02; - static final int NUMERIC_FLOAT = 0x03; - static final int NUMERIC_LONG = 0x04; - static final int NUMERIC_DOUBLE = 0x05; - - static final int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE); - static final int TYPE_MASK = (int) PackedInts.maxValue(TYPE_BITS); - - static final String CODEC_SFX_IDX = "Index"; - static final String CODEC_SFX_DAT = "Data"; - static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; - - private final Directory directory; - private final String segment; - private final String segmentSuffix; - private CompressingStoredFieldsIndexWriter indexWriter; - private IndexOutput fieldsStream; - - private final CompressionMode compressionMode; - private final Compressor compressor; - private final int chunkSize; - - private final GrowableByteArrayDataOutput bufferedDocs; - private int[] numStoredFields; // number of stored fields - private int[] endOffsets; // end offsets in bufferedDocs - private int docBase; // doc ID at the beginning of the chunk - private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID - - /** Sole constructor. */ - public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context, - String formatName, CompressionMode compressionMode, int chunkSize) throws IOException { - assert directory != null; - this.directory = directory; - this.segment = si.name; - this.segmentSuffix = segmentSuffix; - this.compressionMode = compressionMode; - this.compressor = compressionMode.newCompressor(); - this.chunkSize = chunkSize; - this.docBase = 0; - this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); - this.numStoredFields = new int[16]; - this.endOffsets = new int[16]; - this.numBufferedDocs = 0; - - boolean success = false; - IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); - try { - fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); - - final String codecNameIdx = formatName + CODEC_SFX_IDX; - final String codecNameDat = formatName + CODEC_SFX_DAT; - CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT); - CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT); - assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer(); - assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); - - indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); - indexStream = null; - - fieldsStream.writeVInt(PackedInts.VERSION_CURRENT); - - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(indexStream); - abort(); - } - } - } - - @Override - public void close() throws IOException { - try { - IOUtils.close(fieldsStream, indexWriter); - } finally { - fieldsStream = null; - indexWriter = null; - } - } - - @Override - public void startDocument(int numStoredFields) throws IOException { - if (numBufferedDocs == this.numStoredFields.length) { - final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4); - this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength); - endOffsets = Arrays.copyOf(endOffsets, newLength); - } - this.numStoredFields[numBufferedDocs] = numStoredFields; - ++numBufferedDocs; - } - - @Override - public void finishDocument() throws IOException { - endOffsets[numBufferedDocs - 1] = bufferedDocs.length; - if (triggerFlush()) { - flush(); - } - } - - private static void saveInts(int[] values, int length, DataOutput out) throws IOException { - assert length > 0; - if (length == 1) { - out.writeVInt(values[0]); - } else { - boolean allEqual = true; - for (int i = 1; i < length; ++i) { - if (values[i] != values[0]) { - allEqual = false; - break; - } - } - if (allEqual) { - out.writeVInt(0); - out.writeVInt(values[0]); - } else { - long max = 0; - for (int i = 0; i < length; ++i) { - max |= values[i]; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Lucene.Net.Util.Packed; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Codecs.Compressing +{ + public sealed class CompressingStoredFieldsWriter : StoredFieldsWriter + { + static readonly int MAX_DOCUMENTS_PER_CHUNK = 128; + static readonly int STRING = 0x00; + static readonly int BYTE_ARR = 0x01; + static readonly int NUMERIC_INT = 0x02; + static readonly int NUMERIC_FLOAT = 0x03; + static readonly int NUMERIC_LONG = 0x04; + static readonly int NUMERIC_DOUBLE = 0x05; + + static readonly int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE); + static readonly int TYPE_MASK = (int)PackedInts.maxValue(TYPE_BITS); + + static readonly String CODEC_SFX_IDX = "Index"; + static readonly String CODEC_SFX_DAT = "Data"; + static readonly int VERSION_START = 0; + static readonly int VERSION_CURRENT = VERSION_START; + + private Directory directory; + private string segment; + private string segmentSuffix; + private CompressingStoredFieldsIndexWriter indexWriter; + private IndexOutput fieldsStream; + + private CompressionMode compressionMode; + private Compressor compressor; + private int chunkSize; + + private GrowableByteArrayDataOutput bufferedDocs; + private int[] numStoredFields; // number of stored fields + private int[] endOffsets; // end offsets in bufferedDocs + private int docBase; // doc ID at the beginning of the chunk + private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID + + public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) + { + this.directory = directory; + this.segment = si.name; + this.segmentSuffix = segmentSuffix; + this.compressionMode = compressionMode; + this.compressor = compressionMode.newCompressor(); + this.chunkSize = chunkSize; + this.docBase = 0; + this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); + this.numStoredFields = new int[16]; + this.endOffsets = new int[16]; + this.numBufferedDocs = 0; + + bool success = false; + IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); + try + { + fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); + + string codecNameIdx = formatName + CODEC_SFX_IDX; + string codecNameDat = formatName + CODEC_SFX_DAT; + CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); + CodecUtil.WriteHeader(fieldsStream, codecNameDat, VERSION_CURRENT); + + indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); + indexStream = null; + + fieldsStream.WriteVInt(PackedInts.VERSION_CURRENT); + + success = true; + } + finally + { + if (!success) { + IOUtils.CloseWhileHandlingException(indexStream); + abort(); + } + } } - final int bitsRequired = PackedInts.bitsRequired(max); - out.writeVInt(bitsRequired); - final PackedInts.Writer w = PackedInts.getWriterNoHeader(out, PackedInts.Format.PACKED, length, bitsRequired, 1); - for (int i = 0; i < length; ++i) { - w.add(values[i]); + + public override void Close() + { + try + { + IOUtils.Close(fieldsStream, indexWriter); + } + finally + { + fieldsStream = null; + indexWriter = null; + } } - w.finish(); - } - } - } - private void writeHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[] lengths) throws IOException { - // save docBase and numBufferedDocs - fieldsStream.writeVInt(docBase); - fieldsStream.writeVInt(numBufferedDocs); + public override void StartDocument(int numStoredFields) + { + if (numBufferedDocs == this.numStoredFields.Length) + { + int newLength = ArrayUtil.Oversize(numBufferedDocs + 1, 4); + this.numStoredFields = Arrays.CopyOf(this.numStoredFields, newLength); + endOffsets = Arrays.CopyOf(endOffsets, newLength); + } + this.numStoredFields[numBufferedDocs] = numStoredFields; + ++numBufferedDocs; + } - // save numStoredFields - saveInts(numStoredFields, numBufferedDocs, fieldsStream); + public override void FinishDocument() + { + endOffsets[numBufferedDocs - 1] = bufferedDocs.Length; + if (TriggerFlush()) + { + Flush(); + } + } - // save lengths - saveInts(lengths, numBufferedDocs, fieldsStream); - } + private static void saveInts(int[] values, int length, DataOutput output) + { + if (length == 1) + { + output.WriteVInt(values[0]); + } + else + { + bool allEqual = true; + for (int i = 1; i < length; ++i) { + if (values[i] != values[0]) { + allEqual = false; + //break; + } + } + if (allEqual) { + output.WriteVInt(0); + output.WriteVInt(values[0]); + } + else + { + long max = 0; + for (int i = 0; i < length; ++i) { + max |= values[i]; + } + int bitsRequired = PackedInts.BitsRequired(max); + output.WriteVInt(bitsRequired); + PackedInts.Writer w = PackedInts.GetWriterNoHeader(output, PackedInts.Format.PACKED, length, bitsRequired, 1); + for (int i = 0; i < length; ++i) { + w.Add(values[i]); + } + w.Finish(); + } + } + } - private boolean triggerFlush() { - return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes - numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK; - } + private void WriteHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[] lengths) + { + // save docBase and numBufferedDocs + fieldsStream.WriteVInt(docBase); + fieldsStream.WriteVInt(numBufferedDocs); - private void flush() throws IOException { - indexWriter.writeIndex(numBufferedDocs, fieldsStream.getFilePointer()); + // save numStoredFields + saveInts(numStoredFields, numBufferedDocs, fieldsStream); - // transform end offsets into lengths - final int[] lengths = endOffsets; - for (int i = numBufferedDocs - 1; i > 0; --i) { - lengths[i] = endOffsets[i] - endOffsets[i - 1]; - assert lengths[i] >= 0; - } - writeHeader(docBase, numBufferedDocs, numStoredFields, lengths); - - // compress stored fields to fieldsStream - compressor.compress(bufferedDocs.bytes, 0, bufferedDocs.length, fieldsStream); - - // reset - docBase += numBufferedDocs; - numBufferedDocs = 0; - bufferedDocs.length = 0; - } - - @Override - public void writeField(FieldInfo info, IndexableField field) - throws IOException { - int bits = 0; - final BytesRef bytes; - final String string; - - Number number = field.numericValue(); - if (number != null) { - if (number instanceof Byte || number instanceof Short || number instanceof Integer) { - bits = NUMERIC_INT; - } else if (number instanceof Long) { - bits = NUMERIC_LONG; - } else if (number instanceof Float) { - bits = NUMERIC_FLOAT; - } else if (number instanceof Double) { - bits = NUMERIC_DOUBLE; - } else { - throw new IllegalArgumentException("cannot store numeric type " + number.getClass()); - } - string = null; - bytes = null; - } else { - bytes = field.binaryValue(); - if (bytes != null) { - bits = BYTE_ARR; - string = null; - } else { - bits = STRING; - string = field.stringValue(); - if (string == null) { - throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); + // save lengths + saveInts(lengths, numBufferedDocs, fieldsStream); } - } - } - final long infoAndBits = (((long) info.number) << TYPE_BITS) | bits; - bufferedDocs.writeVLong(infoAndBits); - - if (bytes != null) { - bufferedDocs.writeVInt(bytes.length); - bufferedDocs.writeBytes(bytes.bytes, bytes.offset, bytes.length); - } else if (string != null) { - bufferedDocs.writeString(field.stringValue()); - } else { - if (number instanceof Byte || number instanceof Short || number instanceof Integer) { - bufferedDocs.writeInt(number.intValue()); - } else if (number instanceof Long) { - bufferedDocs.writeLong(number.longValue()); - } else if (number instanceof Float) { - bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue())); - } else if (number instanceof Double) { - bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue())); - } else { - throw new AssertionError("Cannot get here"); - } - } - } - - @Override - public void abort() { - IOUtils.closeWhileHandlingException(this); - IOUtils.deleteFilesIgnoringExceptions(directory, - IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), - IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION)); - } - - @Override - public void finish(FieldInfos fis, int numDocs) throws IOException { - if (numBufferedDocs > 0) { - flush(); - } else { - assert bufferedDocs.length == 0; - } - if (docBase != numDocs) { - throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs); - } - indexWriter.finish(numDocs); - assert bufferedDocs.length == 0; - } - - @Override - public int merge(MergeState mergeState) throws IOException { - int docCount = 0; - int idx = 0; - - for (AtomicReader reader : mergeState.readers) { - final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++]; - CompressingStoredFieldsReader matchingFieldsReader = null; - if (matchingSegmentReader != null) { - final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader(); - // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader - if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) { - matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader; + private bool TriggerFlush() + { + return bufferedDocs.Length >= chunkSize || // chunks of at least chunkSize bytes + numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK; } - } - - final int maxDoc = reader.maxDoc(); - final Bits liveDocs = reader.getLiveDocs(); - - if (matchingFieldsReader == null) { - // naive merge... - for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) { - Document doc = reader.document(i); - addDocument(doc, mergeState.fieldInfos); - ++docCount; - mergeState.checkAbort.work(300); + + private void Flush() + { + indexWriter.WriteIndex(numBufferedDocs, fieldsStream.FilePointer); + + // transform end offsets into lengths + int[] lengths = endOffsets; + for (int i = numBufferedDocs - 1; i > 0; --i) + { + lengths[i] = endOffsets[i] - endOffsets[i - 1]; + } + + WriteHeader(docBase, numBufferedDocs, numStoredFields, lengths); + + // compress stored fields to fieldsStream + compressor.Compress(bufferedDocs.Bytes, 0, bufferedDocs.Length, fieldsStream); + + // reset + docBase += numBufferedDocs; + numBufferedDocs = 0; + bufferedDocs.Length = 0; } - } else { - int docID = nextLiveDoc(0, liveDocs, maxDoc); - if (docID < maxDoc) { - // not all docs were deleted - final ChunkIterator it = matchingFieldsReader.chunkIterator(docID); - int[] startOffsets = new int[0]; - do { - // go to the next chunk that contains docID - it.next(docID); - // transform lengths into offsets - if (startOffsets.length < it.chunkDocs) { - startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)]; + + public override void writeField(FieldInfo info, IndexableField field) + { + int bits = 0; + BytesRef bytes; + string str; + + Number number = field.numericValue(); + if (number != null) { + if (number instanceof Byte || number instanceof Short || number instanceof Integer) { + bits = NUMERIC_INT; + } else if (number instanceof Long) { + bits = NUMERIC_LONG; + } else if (number instanceof Float) { + bits = NUMERIC_FLOAT; + } else if (number instanceof Double) { + bits = NUMERIC_DOUBLE; + } else { + throw new IllegalArgumentException("cannot store numeric type " + number.getClass()); + } + str = null; + bytes = null; + } else { + bytes = field.binaryValue(); + if (bytes != null) { + bits = BYTE_ARR; + str = null; + } else { + bits = STRING; + str = field.stringValue(); + if (str == null) { + throw new ArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); + } } - for (int i = 1; i < it.chunkDocs; ++i) { - startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; + } + + long infoAndBits = (((long) info.number) << TYPE_BITS) | bits; + bufferedDocs.WriteVLong(infoAndBits); + + if (bytes != null) { + bufferedDocs.WriteVInt(bytes.length); + bufferedDocs.WriteBytes(bytes.bytes, bytes.offset, bytes.length); + } else if (str != null) { + bufferedDocs.WriteString(field.stringValue()); + } else { + if (number instanceof Byte || number instanceof Short || number instanceof Integer) { + bufferedDocs.writeInt(number.intValue()); + } else if (number instanceof Long) { + bufferedDocs.writeLong(number.longValue()); + } else if (number instanceof Float) { + bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue())); + } else if (number instanceof Double) { + bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue())); + } else { + throw new AssertionError("Cannot get here"); } + } + } - if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode - && numBufferedDocs == 0 // starting a new chunk - && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough - && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough - && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk - assert docID == it.docBase; - - // no need to decompress, just copy data - indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer()); - writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths); - it.copyCompressedData(fieldsStream); - this.docBase += it.chunkDocs; - docID = nextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc); - docCount += it.chunkDocs; - mergeState.checkAbort.work(300 * it.chunkDocs); - } else { - // decompress - it.decompress(); - if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) { - throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length); + public override void Abort() { + IOUtils.CloseWhileHandlingException(this); + IOUtils.DeleteFilesIgnoringExceptions(directory, + IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), + IndexFileNames.SegmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION)); + } + + public override void finish(FieldInfos fis, int numDocs) + { + if (numBufferedDocs > 0) { + Flush(); + } else { + //assert bufferedDocs.length == 0; + } + if (docBase != numDocs) { + throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs); + } + indexWriter.finish(numDocs); + } + + public override int Merge(MergeState mergeState) + { + int docCount = 0; + int idx = 0; + + foreach (AtomicReader reader in mergeState.readers) + { + SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++]; + CompressingStoredFieldsReader matchingFieldsReader = null; + if (matchingSegmentReader != null) + { + StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; + // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader + if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader) + { + matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader; } - // copy non-deleted docs - for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1, liveDocs, maxDoc)) { - final int diff = docID - it.docBase; - startDocument(it.numStoredFields[diff]); - bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]); - finishDocument(); + } + + int maxDoc = reader.MaxDoc; + IBits liveDocs = reader.LiveDocs; + + if (matchingFieldsReader == null) { + // naive merge... + for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { + Document doc = reader.Document(i); + AddDocument(doc, mergeState.fieldInfos); ++docCount; - mergeState.checkAbort.work(300); + mergeState.checkAbort.Work(300); + } + } else { + int docID = NextLiveDoc(0, liveDocs, maxDoc); + if (docID < maxDoc) { + // not all docs were deleted + ChunkIterator it = matchingFieldsReader.ChunkIterator(docID); + int[] startOffsets = new int[0]; + do { + // go to the next chunk that contains docID + it.next(docID); + // transform lengths into offsets + if (startOffsets.Length < it.chunkDocs) { + startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)]; + } + for (int i = 1; i < it.chunkDocs; ++i) { + startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; + } + + if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode + && numBufferedDocs == 0 // starting a new chunk + && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough + && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough + && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk + + // no need to decompress, just copy data + indexWriter.writeIndex(it.chunkDocs, fieldsStream.FilePointer); + WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths); + it.copyCompressedData(fieldsStream); + this.docBase += it.chunkDocs; + docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc); + docCount += it.chunkDocs; + mergeState.checkAbort.Work(300 * it.chunkDocs); + } else { + // decompress + it.decompress(); + if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) { + throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length); + } + // copy non-deleted docs + for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) { + int diff = docID - it.docBase; + StartDocument(it.numStoredFields[diff]); + bufferedDocs.WriteBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]); + FinishDocument(); + ++docCount; + mergeState.checkAbort.Work(300); + } + } + } while (docID < maxDoc); } } - } while (docID < maxDoc); + } + + Finish(mergeState.fieldInfos, docCount); + return docCount; } - } - } - finish(mergeState.fieldInfos, docCount); - return docCount; - } - private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) { - if (liveDocs == null) { - return doc; - } - while (doc < maxDoc && !liveDocs.get(doc)) { - ++doc; - } - return doc; - } + private static int NextLiveDoc(int doc, IBits liveDocs, int maxDoc) + { + if (liveDocs == null) + { + return doc; + } + while (doc < maxDoc && !liveDocs[doc]) + { + ++doc; + } + return doc; + } - private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) { - if (liveDocs == null) { - return maxDoc; - } - while (doc < maxDoc && liveDocs.get(doc)) { - ++doc; - } - return doc; - } + private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) + { + if (liveDocs == null) + { + return maxDoc; + } + while (doc < maxDoc && liveDocs[doc]) + { + ++doc; + } + return doc; + } + } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/Compressor.cs ---------------------------------------------------------------------- diff --git a/src/core/Codecs/Compressing/Compressor.cs b/src/core/Codecs/Compressing/Compressor.cs index 48fdb74..00c0053 100644 --- a/src/core/Codecs/Compressing/Compressor.cs +++ b/src/core/Codecs/Compressing/Compressor.cs @@ -32,7 +32,7 @@ namespace Lucene.Net.Codecs.Compressing * compressor to add all necessary information so that a {@link Decompressor} * will know when to stop decompressing bytes from the stream. */ - public abstract void Compress(byte[] bytes, int off, int len, DataOutput output); + public abstract void Compress(sbyte[] bytes, int off, int len, DataOutput output); } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs ---------------------------------------------------------------------- diff --git a/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs b/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs index a0b8eba..d6b873d 100644 --- a/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs +++ b/src/core/Codecs/Compressing/GrowableByteArrayDataOutput.cs @@ -25,13 +25,37 @@ namespace Lucene.Net.Codecs.Compressing { internal sealed class GrowableByteArrayDataOutput : DataOutput { - sbyte[] bytes; - int length; + private sbyte[] _bytes; + private int _length; - GrowableByteArrayDataOutput(int cp) + public GrowableByteArrayDataOutput(int cp) { - this.bytes = new sbyte[ArrayUtil.Oversize(cp, 1)]; - this.length = 0; + Bytes = new sbyte[ArrayUtil.Oversize(cp, 1)]; + Length = 0; + } + + public sbyte[] Bytes + { + get + { + return _bytes; + } + set + { + _bytes = value; + } + } + + public int Length + { + get + { + return _length; + } + set + { + _length = value; + } } public override void WriteByte(byte b) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/64c13f3c/src/core/Lucene.Net.csproj ---------------------------------------------------------------------- diff --git a/src/core/Lucene.Net.csproj b/src/core/Lucene.Net.csproj index ce097bb..85f9818 100644 --- a/src/core/Lucene.Net.csproj +++ b/src/core/Lucene.Net.csproj @@ -187,7 +187,9 @@ + +