Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 9EE12F079 for ; Wed, 3 Apr 2013 17:40:12 +0000 (UTC) Received: (qmail 20828 invoked by uid 500); 3 Apr 2013 17:39:53 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 20469 invoked by uid 500); 3 Apr 2013 17:39:53 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 19468 invoked by uid 99); 3 Apr 2013 17:39:45 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 03 Apr 2013 17:39:45 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id A6878836ED2; Wed, 3 Apr 2013 17:39:44 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ccurrens@apache.org To: commits@lucenenet.apache.org Date: Wed, 03 Apr 2013 17:39:51 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [08/51] [partial] Mass convert mixed tabs to spaces http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/IntBlockPool.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/IntBlockPool.cs b/src/core/Index/IntBlockPool.cs index 5fbee30..5b589e5 100644 --- a/src/core/Index/IntBlockPool.cs +++ b/src/core/Index/IntBlockPool.cs @@ -19,61 +19,61 @@ using System; namespace Lucene.Net.Index { - - sealed class IntBlockPool - { - private void InitBlock() - { - intUpto = DocumentsWriter.INT_BLOCK_SIZE; - } - - public int[][] buffers = new int[10][]; - - internal int bufferUpto = - 1; // Which buffer we are upto - public int intUpto; // Where we are in head buffer - - public int[] buffer; // Current head buffer - public int intOffset = - DocumentsWriter.INT_BLOCK_SIZE; // Current head offset - - private DocumentsWriter docWriter; - internal bool trackAllocations; - - public IntBlockPool(DocumentsWriter docWriter, bool trackAllocations) - { - InitBlock(); - this.docWriter = docWriter; - this.trackAllocations = trackAllocations; - } - - public void Reset() - { - if (bufferUpto != - 1) - { - if (bufferUpto > 0) - // Recycle all but the first buffer - docWriter.RecycleIntBlocks(buffers, 1, 1 + bufferUpto); - - // Reuse first buffer - bufferUpto = 0; - intUpto = 0; - intOffset = 0; - buffer = buffers[0]; - } - } - - public void NextBuffer() - { - if (1 + bufferUpto == buffers.Length) - { - int[][] newBuffers = new int[(int) (buffers.Length * 1.5)][]; - Array.Copy(buffers, 0, newBuffers, 0, buffers.Length); - buffers = newBuffers; - } - buffer = buffers[1 + bufferUpto] = docWriter.GetIntBlock(trackAllocations); - bufferUpto++; - - intUpto = 0; - intOffset += DocumentsWriter.INT_BLOCK_SIZE; - } - } + + sealed class IntBlockPool + { + private void InitBlock() + { + intUpto = DocumentsWriter.INT_BLOCK_SIZE; + } + + public int[][] buffers = new int[10][]; + + internal int bufferUpto = - 1; // Which buffer we are upto + public int intUpto; // Where we are in head buffer + + public int[] buffer; // Current head buffer + public int intOffset = - DocumentsWriter.INT_BLOCK_SIZE; // Current head offset + + private DocumentsWriter docWriter; + internal bool trackAllocations; + + public IntBlockPool(DocumentsWriter docWriter, bool trackAllocations) + { + InitBlock(); + this.docWriter = docWriter; + this.trackAllocations = trackAllocations; + } + + public void Reset() + { + if (bufferUpto != - 1) + { + if (bufferUpto > 0) + // Recycle all but the first buffer + docWriter.RecycleIntBlocks(buffers, 1, 1 + bufferUpto); + + // Reuse first buffer + bufferUpto = 0; + intUpto = 0; + intOffset = 0; + buffer = buffers[0]; + } + } + + public void NextBuffer() + { + if (1 + bufferUpto == buffers.Length) + { + int[][] newBuffers = new int[(int) (buffers.Length * 1.5)][]; + Array.Copy(buffers, 0, newBuffers, 0, buffers.Length); + buffers = newBuffers; + } + buffer = buffers[1 + bufferUpto] = docWriter.GetIntBlock(trackAllocations); + bufferUpto++; + + intUpto = 0; + intOffset += DocumentsWriter.INT_BLOCK_SIZE; + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocConsumer.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/InvertedDocConsumer.cs b/src/core/Index/InvertedDocConsumer.cs index bb9b2f8..2be2a70 100644 --- a/src/core/Index/InvertedDocConsumer.cs +++ b/src/core/Index/InvertedDocConsumer.cs @@ -20,34 +20,34 @@ using System.Collections.Generic; namespace Lucene.Net.Index { - - abstract class InvertedDocConsumer - { - - /// Add a new thread - internal abstract InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread); - - /// Abort (called after hitting AbortException) - public abstract void Abort(); + + abstract class InvertedDocConsumer + { + + /// Add a new thread + internal abstract InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread); + + /// Abort (called after hitting AbortException) + public abstract void Abort(); - /// Flush a new segment - internal abstract void Flush( - IDictionary> threadsAndFields, - SegmentWriteState state); - - /// Close doc stores - internal abstract void CloseDocStore(SegmentWriteState state); - - /// Attempt to free RAM, returning true if any RAM was - /// freed - /// - public abstract bool FreeRAM(); - - internal FieldInfos fieldInfos; - - internal virtual void SetFieldInfos(FieldInfos fieldInfos) - { - this.fieldInfos = fieldInfos; - } - } + /// Flush a new segment + internal abstract void Flush( + IDictionary> threadsAndFields, + SegmentWriteState state); + + /// Close doc stores + internal abstract void CloseDocStore(SegmentWriteState state); + + /// Attempt to free RAM, returning true if any RAM was + /// freed + /// + public abstract bool FreeRAM(); + + internal FieldInfos fieldInfos; + + internal virtual void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocConsumerPerField.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/InvertedDocConsumerPerField.cs b/src/core/Index/InvertedDocConsumerPerField.cs index 471d9b7..200afed 100644 --- a/src/core/Index/InvertedDocConsumerPerField.cs +++ b/src/core/Index/InvertedDocConsumerPerField.cs @@ -20,27 +20,27 @@ using Lucene.Net.Documents; namespace Lucene.Net.Index { - - abstract class InvertedDocConsumerPerField - { - - // Called once per field, and is given all Fieldable - // occurrences for this field in the document. Return - // true if you wish to see inverted tokens for these - // fields: - internal abstract bool Start(IFieldable[] fields, int count); - - // Called before a field instance is being processed - internal abstract void Start(IFieldable field); - - // Called once per inverted token - internal abstract void Add(); - - // Called once per field per document, after all Fieldable - // occurrences are inverted - internal abstract void Finish(); - - // Called on hitting an aborting exception - public abstract void Abort(); - } + + abstract class InvertedDocConsumerPerField + { + + // Called once per field, and is given all Fieldable + // occurrences for this field in the document. Return + // true if you wish to see inverted tokens for these + // fields: + internal abstract bool Start(IFieldable[] fields, int count); + + // Called before a field instance is being processed + internal abstract void Start(IFieldable field); + + // Called once per inverted token + internal abstract void Add(); + + // Called once per field per document, after all Fieldable + // occurrences are inverted + internal abstract void Finish(); + + // Called on hitting an aborting exception + public abstract void Abort(); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocConsumerPerThread.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/InvertedDocConsumerPerThread.cs b/src/core/Index/InvertedDocConsumerPerThread.cs index 49ed8df..5e0b6d1 100644 --- a/src/core/Index/InvertedDocConsumerPerThread.cs +++ b/src/core/Index/InvertedDocConsumerPerThread.cs @@ -19,12 +19,12 @@ using System; namespace Lucene.Net.Index { - - abstract class InvertedDocConsumerPerThread - { - public abstract void StartDocument(); - internal abstract InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - public abstract DocumentsWriter.DocWriter FinishDocument(); - public abstract void Abort(); - } + + abstract class InvertedDocConsumerPerThread + { + public abstract void StartDocument(); + internal abstract InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + public abstract DocumentsWriter.DocWriter FinishDocument(); + public abstract void Abort(); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocEndConsumer.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/InvertedDocEndConsumer.cs b/src/core/Index/InvertedDocEndConsumer.cs index fb0a69e..f9e9548 100644 --- a/src/core/Index/InvertedDocEndConsumer.cs +++ b/src/core/Index/InvertedDocEndConsumer.cs @@ -20,13 +20,13 @@ using System.Collections.Generic; namespace Lucene.Net.Index { - - abstract class InvertedDocEndConsumer - { - public abstract InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread); + + abstract class InvertedDocEndConsumer + { + public abstract InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread); public abstract void Flush(IDictionary> threadsAndFields, SegmentWriteState state); - internal abstract void CloseDocStore(SegmentWriteState state); - public abstract void Abort(); - internal abstract void SetFieldInfos(FieldInfos fieldInfos); - } + internal abstract void CloseDocStore(SegmentWriteState state); + public abstract void Abort(); + internal abstract void SetFieldInfos(FieldInfos fieldInfos); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocEndConsumerPerField.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/InvertedDocEndConsumerPerField.cs b/src/core/Index/InvertedDocEndConsumerPerField.cs index dfad1c9..2e82ad4 100644 --- a/src/core/Index/InvertedDocEndConsumerPerField.cs +++ b/src/core/Index/InvertedDocEndConsumerPerField.cs @@ -19,10 +19,10 @@ using System; namespace Lucene.Net.Index { - - abstract class InvertedDocEndConsumerPerField - { - internal abstract void Finish(); - internal abstract void Abort(); - } + + abstract class InvertedDocEndConsumerPerField + { + internal abstract void Finish(); + internal abstract void Abort(); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocEndConsumerPerThread.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/InvertedDocEndConsumerPerThread.cs b/src/core/Index/InvertedDocEndConsumerPerThread.cs index 2f4fb5c..4721566 100644 --- a/src/core/Index/InvertedDocEndConsumerPerThread.cs +++ b/src/core/Index/InvertedDocEndConsumerPerThread.cs @@ -19,12 +19,12 @@ using System; namespace Lucene.Net.Index { - - abstract class InvertedDocEndConsumerPerThread - { - internal abstract void StartDocument(); - internal abstract InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - internal abstract void FinishDocument(); - internal abstract void Abort(); - } + + abstract class InvertedDocEndConsumerPerThread + { + internal abstract void StartDocument(); + internal abstract InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + internal abstract void FinishDocument(); + internal abstract void Abort(); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs b/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs index 3775de1..7cb928b 100644 --- a/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs +++ b/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs @@ -19,33 +19,33 @@ using System.Collections.Generic; namespace Lucene.Net.Index { - - /// This implementation that - /// keeps only the most recent commit and immediately removes - /// all prior commits after a new commit is done. This is - /// the default deletion policy. - /// - - public sealed class KeepOnlyLastCommitDeletionPolicy : IndexDeletionPolicy - { - - /// Deletes all commits except the most recent one. - public void OnInit(IList commits) where T : IndexCommit - { - // Note that commits.size() should normally be 1: - OnCommit(commits); - } - - /// Deletes all commits except the most recent one. - public void OnCommit(IList commits) where T : IndexCommit - { - // Note that commits.size() should normally be 2 (if not - // called by onInit above): - int size = commits.Count; - for (int i = 0; i < size - 1; i++) - { - commits[i].Delete(); - } - } - } + + /// This implementation that + /// keeps only the most recent commit and immediately removes + /// all prior commits after a new commit is done. This is + /// the default deletion policy. + /// + + public sealed class KeepOnlyLastCommitDeletionPolicy : IndexDeletionPolicy + { + + /// Deletes all commits except the most recent one. + public void OnInit(IList commits) where T : IndexCommit + { + // Note that commits.size() should normally be 1: + OnCommit(commits); + } + + /// Deletes all commits except the most recent one. + public void OnCommit(IList commits) where T : IndexCommit + { + // Note that commits.size() should normally be 2 (if not + // called by onInit above): + int size = commits.Count; + for (int i = 0; i < size - 1; i++) + { + commits[i].Delete(); + } + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/LogByteSizeMergePolicy.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/LogByteSizeMergePolicy.cs b/src/core/Index/LogByteSizeMergePolicy.cs index 5d5c952..5f1b13d 100644 --- a/src/core/Index/LogByteSizeMergePolicy.cs +++ b/src/core/Index/LogByteSizeMergePolicy.cs @@ -19,34 +19,34 @@ using System; namespace Lucene.Net.Index { - - /// This is a that measures size of a - /// segment as the total byte size of the segment's files. - /// - public class LogByteSizeMergePolicy : LogMergePolicy - { - - /// - /// - public const double DEFAULT_MIN_MERGE_MB = 1.6; - - /// Default maximum segment size. A segment of this size - /// - /// - public static readonly long DEFAULT_MAX_MERGE_MB = long.MaxValue; - - public LogByteSizeMergePolicy(IndexWriter writer) + + /// This is a that measures size of a + /// segment as the total byte size of the segment's files. + /// + public class LogByteSizeMergePolicy : LogMergePolicy + { + + /// + /// + public const double DEFAULT_MIN_MERGE_MB = 1.6; + + /// Default maximum segment size. A segment of this size + /// + /// + public static readonly long DEFAULT_MAX_MERGE_MB = long.MaxValue; + + public LogByteSizeMergePolicy(IndexWriter writer) : base(writer) - { - minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024); + { + minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024); //mgarski - the line below causes an overflow in .NET, resulting in a negative number... - //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024); + //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024); maxMergeSize = DEFAULT_MAX_MERGE_MB; - } - protected internal override long Size(SegmentInfo info) - { - return SizeBytes(info); - } + } + protected internal override long Size(SegmentInfo info) + { + return SizeBytes(info); + } protected override void Dispose(bool disposing) { @@ -54,46 +54,46 @@ namespace Lucene.Net.Index } - ///

Gets or sets the largest segment (measured by total - /// byte size of the segment's files, in MB) that may be - /// merged with other segments. Small values (e.g., less - /// than 50 MB) are best for interactive indexing, as this - /// limits the length of pauses while indexing to a few - /// seconds. Larger values are best for batched indexing - /// and speedier searches.

- /// - ///

Note that is also - /// used to check whether a segment is too large for - /// merging (it's either or).

- ///

- public virtual double MaxMergeMB - { - get { return maxMergeSize/1024d/1024d; } - set - { - //mgarski: java gracefully overflows to Int64.MaxValue, .NET to MinValue... - maxMergeSize = (long) (value*1024*1024); - if (maxMergeSize < 0) - { - maxMergeSize = DEFAULT_MAX_MERGE_MB; - } - } - } + ///

Gets or sets the largest segment (measured by total + /// byte size of the segment's files, in MB) that may be + /// merged with other segments. Small values (e.g., less + /// than 50 MB) are best for interactive indexing, as this + /// limits the length of pauses while indexing to a few + /// seconds. Larger values are best for batched indexing + /// and speedier searches.

+ /// + ///

Note that is also + /// used to check whether a segment is too large for + /// merging (it's either or).

+ ///

+ public virtual double MaxMergeMB + { + get { return maxMergeSize/1024d/1024d; } + set + { + //mgarski: java gracefully overflows to Int64.MaxValue, .NET to MinValue... + maxMergeSize = (long) (value*1024*1024); + if (maxMergeSize < 0) + { + maxMergeSize = DEFAULT_MAX_MERGE_MB; + } + } + } - /// Gets or sets the minimum size for the lowest level segments. - /// Any segments below this size are considered to be on - /// the same level (even if they vary drastically in size) - /// and will be merged whenever there are mergeFactor of - /// them. This effectively truncates the "long tail" of - /// small segments that would otherwise be created into a - /// single level. If you set this too large, it could - /// greatly increase the merging cost during indexing (if - /// you flush many small segments). - /// - public virtual double MinMergeMB - { - get { return ((double) minMergeSize)/1024/1024; } - set { minMergeSize = (long) (value*1024*1024); } - } - } + /// Gets or sets the minimum size for the lowest level segments. + /// Any segments below this size are considered to be on + /// the same level (even if they vary drastically in size) + /// and will be merged whenever there are mergeFactor of + /// them. This effectively truncates the "long tail" of + /// small segments that would otherwise be created into a + /// single level. If you set this too large, it could + /// greatly increase the merging cost during indexing (if + /// you flush many small segments). + /// + public virtual double MinMergeMB + { + get { return ((double) minMergeSize)/1024/1024; } + set { minMergeSize = (long) (value*1024*1024); } + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/LogDocMergePolicy.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/LogDocMergePolicy.cs b/src/core/Index/LogDocMergePolicy.cs index 55ee407..610b890 100644 --- a/src/core/Index/LogDocMergePolicy.cs +++ b/src/core/Index/LogDocMergePolicy.cs @@ -19,51 +19,51 @@ using System; namespace Lucene.Net.Index { - - /// This is a that measures size of a - /// segment as the number of documents (not taking deletions - /// into account). - /// - - public class LogDocMergePolicy : LogMergePolicy - { - - /// - /// - public const int DEFAULT_MIN_MERGE_DOCS = 1000; - - public LogDocMergePolicy(IndexWriter writer):base(writer) - { - minMergeSize = DEFAULT_MIN_MERGE_DOCS; - - // maxMergeSize is never used by LogDocMergePolicy; set - // it to Long.MAX_VALUE to disable it - maxMergeSize = System.Int64.MaxValue; - } - protected internal override long Size(SegmentInfo info) - { - return SizeDocs(info); - } + + /// This is a that measures size of a + /// segment as the number of documents (not taking deletions + /// into account). + /// + + public class LogDocMergePolicy : LogMergePolicy + { + + /// + /// + public const int DEFAULT_MIN_MERGE_DOCS = 1000; + + public LogDocMergePolicy(IndexWriter writer):base(writer) + { + minMergeSize = DEFAULT_MIN_MERGE_DOCS; + + // maxMergeSize is never used by LogDocMergePolicy; set + // it to Long.MAX_VALUE to disable it + maxMergeSize = System.Int64.MaxValue; + } + protected internal override long Size(SegmentInfo info) + { + return SizeDocs(info); + } - protected override void Dispose(bool disposing) + protected override void Dispose(bool disposing) { // Do nothing. } - /// Gets or sets the minimum size for the lowest level segments. - /// Any segments below this size are considered to be on - /// the same level (even if they vary drastically in size) - /// and will be merged whenever there are mergeFactor of - /// them. This effectively truncates the "long tail" of - /// small segments that would otherwise be created into a - /// single level. If you set this too large, it could - /// greatly increase the merging cost during indexing (if - /// you flush many small segments). - /// - public virtual int MinMergeDocs - { - get { return (int) minMergeSize; } - set { minMergeSize = value; } - } - } + /// Gets or sets the minimum size for the lowest level segments. + /// Any segments below this size are considered to be on + /// the same level (even if they vary drastically in size) + /// and will be merged whenever there are mergeFactor of + /// them. This effectively truncates the "long tail" of + /// small segments that would otherwise be created into a + /// single level. If you set this too large, it could + /// greatly increase the merging cost during indexing (if + /// you flush many small segments). + /// + public virtual int MinMergeDocs + { + get { return (int) minMergeSize; } + set { minMergeSize = value; } + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/LogMergePolicy.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/LogMergePolicy.cs b/src/core/Index/LogMergePolicy.cs index c087835..5c65c92 100644 --- a/src/core/Index/LogMergePolicy.cs +++ b/src/core/Index/LogMergePolicy.cs @@ -20,508 +20,508 @@ using System.Collections.Generic; namespace Lucene.Net.Index { - - ///

This class implements a that tries - /// to merge segments into levels of exponentially - /// increasing size, where each level has fewer segments than - /// the value of the merge factor. Whenever extra segments - /// (beyond the merge factor upper bound) are encountered, - /// all segments within the level are merged. You can get or - /// set the merge factor using and - /// respectively.

- /// - ///

This class is abstract and requires a subclass to - /// define the method which specifies how a - /// segment's size is determined. - /// is one subclass that measures size by document count in - /// the segment. is another - /// subclass that measures size as the total byte size of the - /// file(s) for the segment.

- ///

- - public abstract class LogMergePolicy : MergePolicy - { - - /// Defines the allowed range of log(size) for each - /// level. A level is computed by taking the max segment - /// log size, minus LEVEL_LOG_SPAN, and finding all - /// segments falling within that range. - /// - public const double LEVEL_LOG_SPAN = 0.75; - - /// Default merge factor, which is how many segments are - /// merged at a time - /// - public const int DEFAULT_MERGE_FACTOR = 10; - - /// Default maximum segment size. A segment of this size - /// - /// - public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue; + + ///

This class implements a that tries + /// to merge segments into levels of exponentially + /// increasing size, where each level has fewer segments than + /// the value of the merge factor. Whenever extra segments + /// (beyond the merge factor upper bound) are encountered, + /// all segments within the level are merged. You can get or + /// set the merge factor using and + /// respectively.

+ /// + ///

This class is abstract and requires a subclass to + /// define the method which specifies how a + /// segment's size is determined. + /// is one subclass that measures size by document count in + /// the segment. is another + /// subclass that measures size as the total byte size of the + /// file(s) for the segment.

+ ///

+ + public abstract class LogMergePolicy : MergePolicy + { + + /// Defines the allowed range of log(size) for each + /// level. A level is computed by taking the max segment + /// log size, minus LEVEL_LOG_SPAN, and finding all + /// segments falling within that range. + /// + public const double LEVEL_LOG_SPAN = 0.75; + + /// Default merge factor, which is how many segments are + /// merged at a time + /// + public const int DEFAULT_MERGE_FACTOR = 10; + + /// Default maximum segment size. A segment of this size + /// + /// + public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue; /// Default noCFSRatio. If a merge's size is >= 10% of /// the index, then we disable compound file for it. /// See /// public static double DEFAULT_NO_CFS_RATIO = 0.1; - - private int mergeFactor = DEFAULT_MERGE_FACTOR; - - internal long minMergeSize; - internal long maxMergeSize; - internal int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + + private int mergeFactor = DEFAULT_MERGE_FACTOR; + + internal long minMergeSize; + internal long maxMergeSize; + internal int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; protected double internalNoCFSRatio = DEFAULT_NO_CFS_RATIO; - - /* TODO 3.0: change this default to true */ - protected internal bool internalCalibrateSizeByDeletes = true; - - private bool useCompoundFile = true; - private bool useCompoundDocStore = true; + + /* TODO 3.0: change this default to true */ + protected internal bool internalCalibrateSizeByDeletes = true; + + private bool useCompoundFile = true; + private bool useCompoundDocStore = true; - protected LogMergePolicy(IndexWriter writer):base(writer) - { - } - - protected internal virtual bool Verbose() - { - return writer != null && writer.Verbose; - } + protected LogMergePolicy(IndexWriter writer):base(writer) + { + } + + protected internal virtual bool Verbose() + { + return writer != null && writer.Verbose; + } - public double NoCFSRatio - { - get { return internalNoCFSRatio; } - set - { - if (value < 0.0 || value > 1.0) - { - throw new ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value); - } - this.internalNoCFSRatio = value; - } - } + public double NoCFSRatio + { + get { return internalNoCFSRatio; } + set + { + if (value < 0.0 || value > 1.0) + { + throw new ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value); + } + this.internalNoCFSRatio = value; + } + } - /* If a merged segment will be more than this percentage + /* If a merged segment will be more than this percentage * of the total size of the index, leave the segment as * non-compound file even if compound file is enabled. * Set to 1.0 to always use CFS regardless of merge * size. */ - private void Message(System.String message) - { - if (Verbose()) - writer.Message("LMP: " + message); - } + private void Message(System.String message) + { + if (Verbose()) + writer.Message("LMP: " + message); + } - /// Gets or sets how often segment indices are merged by - /// addDocument(). With smaller values, less RAM is used - /// while indexing, and searches on unoptimized indices are - /// faster, but indexing speed is slower. With larger - /// values, more RAM is used during indexing, and while - /// searches on unoptimized indices are slower, indexing is - /// faster. Thus larger values (> 10) are best for batch - /// index creation, and smaller values (< 10) for indices - /// that are interactively maintained. - /// - public virtual int MergeFactor - { - get { return mergeFactor; } - set - { - if (value < 2) - throw new System.ArgumentException("mergeFactor cannot be less than 2"); - this.mergeFactor = value; - } - } + /// Gets or sets how often segment indices are merged by + /// addDocument(). With smaller values, less RAM is used + /// while indexing, and searches on unoptimized indices are + /// faster, but indexing speed is slower. With larger + /// values, more RAM is used during indexing, and while + /// searches on unoptimized indices are slower, indexing is + /// faster. Thus larger values (> 10) are best for batch + /// index creation, and smaller values (< 10) for indices + /// that are interactively maintained. + /// + public virtual int MergeFactor + { + get { return mergeFactor; } + set + { + if (value < 2) + throw new System.ArgumentException("mergeFactor cannot be less than 2"); + this.mergeFactor = value; + } + } - public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) - { - return useCompoundFile; - } - - /// Gets or sets whether compound file format should be used for - /// newly flushed and newly merged segments. - /// - public virtual void SetUseCompoundFile(bool useCompoundFile) - { - this.useCompoundFile = useCompoundFile; - } + public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) + { + return useCompoundFile; + } + + /// Gets or sets whether compound file format should be used for + /// newly flushed and newly merged segments. + /// + public virtual void SetUseCompoundFile(bool useCompoundFile) + { + this.useCompoundFile = useCompoundFile; + } [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public virtual bool GetUseCompoundFile() - { - return useCompoundFile; - } - - // Javadoc inherited - public override bool UseCompoundDocStore(SegmentInfos infos) - { - return useCompoundDocStore; - } - - /// Sets whether compound file format should be used for - /// newly flushed and newly merged doc store - /// segment files (term vectors and stored fields). - /// - public virtual void SetUseCompoundDocStore(bool useCompoundDocStore) - { - this.useCompoundDocStore = useCompoundDocStore; - } - - /// Returns true if newly flushed and newly merge doc - /// store segment files (term vectors and stored fields) - /// + { + return useCompoundFile; + } + + // Javadoc inherited + public override bool UseCompoundDocStore(SegmentInfos infos) + { + return useCompoundDocStore; + } + + /// Sets whether compound file format should be used for + /// newly flushed and newly merged doc store + /// segment files (term vectors and stored fields). + /// + public virtual void SetUseCompoundDocStore(bool useCompoundDocStore) + { + this.useCompoundDocStore = useCompoundDocStore; + } + + /// Returns true if newly flushed and newly merge doc + /// store segment files (term vectors and stored fields) + /// /// - /// + /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public virtual bool GetUseCompoundDocStore() - { - return useCompoundDocStore; - } + { + return useCompoundDocStore; + } - /// Gets or sets whether the segment size should be calibrated by - /// the number of deletes when choosing segments for merge. - /// - public virtual bool CalibrateSizeByDeletes - { - set { this.internalCalibrateSizeByDeletes = value; } - get { return internalCalibrateSizeByDeletes; } - } + /// Gets or sets whether the segment size should be calibrated by + /// the number of deletes when choosing segments for merge. + /// + public virtual bool CalibrateSizeByDeletes + { + set { this.internalCalibrateSizeByDeletes = value; } + get { return internalCalibrateSizeByDeletes; } + } - abstract protected internal long Size(SegmentInfo info); - - protected internal virtual long SizeDocs(SegmentInfo info) - { - if (internalCalibrateSizeByDeletes) - { - int delCount = writer.NumDeletedDocs(info); - return (info.docCount - (long) delCount); - } - else - { - return info.docCount; - } - } - - protected internal virtual long SizeBytes(SegmentInfo info) - { - long byteSize = info.SizeInBytes(); - if (internalCalibrateSizeByDeletes) - { - int delCount = writer.NumDeletedDocs(info); - float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount)); - return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio))); - } - else - { - return byteSize; - } - } - - private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet segmentsToOptimize) - { - int numSegments = infos.Count; - int numToOptimize = 0; - SegmentInfo optimizeInfo = null; - for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) - { - SegmentInfo info = infos.Info(i); - if (segmentsToOptimize.Contains(info)) - { - numToOptimize++; - optimizeInfo = info; - } - } - - return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)); - } - - /// Returns true if this single info is optimized (has no - /// pending norms or deletes, is in the same dir as the - /// writer, and matches the current compound file setting - /// - private bool IsOptimized(SegmentInfo info) - { - bool hasDeletions = writer.NumDeletedDocs(info) > 0; - return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory && + abstract protected internal long Size(SegmentInfo info); + + protected internal virtual long SizeDocs(SegmentInfo info) + { + if (internalCalibrateSizeByDeletes) + { + int delCount = writer.NumDeletedDocs(info); + return (info.docCount - (long) delCount); + } + else + { + return info.docCount; + } + } + + protected internal virtual long SizeBytes(SegmentInfo info) + { + long byteSize = info.SizeInBytes(); + if (internalCalibrateSizeByDeletes) + { + int delCount = writer.NumDeletedDocs(info); + float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount)); + return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio))); + } + else + { + return byteSize; + } + } + + private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet segmentsToOptimize) + { + int numSegments = infos.Count; + int numToOptimize = 0; + SegmentInfo optimizeInfo = null; + for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) + { + SegmentInfo info = infos.Info(i); + if (segmentsToOptimize.Contains(info)) + { + numToOptimize++; + optimizeInfo = info; + } + } + + return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)); + } + + /// Returns true if this single info is optimized (has no + /// pending norms or deletes, is in the same dir as the + /// writer, and matches the current compound file setting + /// + private bool IsOptimized(SegmentInfo info) + { + bool hasDeletions = writer.NumDeletedDocs(info) > 0; + return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory && (info.GetUseCompoundFile() == useCompoundFile || internalNoCFSRatio < 1.0); - } - - /// Returns the merges necessary to optimize the index. - /// This merge policy defines "optimized" to mean only one - /// segment in the index, where that segment has no - /// deletions pending nor separate norms, and it is in - /// compound file format if the current useCompoundFile - /// setting is true. This method returns multiple merges - /// (mergeFactor at a time) so the - /// in use may make use of concurrency. - /// - public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet segmentsToOptimize) - { - MergeSpecification spec; - - System.Diagnostics.Debug.Assert(maxNumSegments > 0); - - if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) - { - - // Find the newest (rightmost) segment that needs to - // be optimized (other segments may have been flushed - // since optimize started): - int last = infos.Count; - while (last > 0) - { - SegmentInfo info = infos.Info(--last); - if (segmentsToOptimize.Contains(info)) - { - last++; - break; - } - } - - if (last > 0) - { - - spec = new MergeSpecification(); - - // First, enroll all "full" merges (size - // mergeFactor) to potentially be run concurrently: - while (last - maxNumSegments + 1 >= mergeFactor) - { + } + + /// Returns the merges necessary to optimize the index. + /// This merge policy defines "optimized" to mean only one + /// segment in the index, where that segment has no + /// deletions pending nor separate norms, and it is in + /// compound file format if the current useCompoundFile + /// setting is true. This method returns multiple merges + /// (mergeFactor at a time) so the + /// in use may make use of concurrency. + /// + public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet segmentsToOptimize) + { + MergeSpecification spec; + + System.Diagnostics.Debug.Assert(maxNumSegments > 0); + + if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) + { + + // Find the newest (rightmost) segment that needs to + // be optimized (other segments may have been flushed + // since optimize started): + int last = infos.Count; + while (last > 0) + { + SegmentInfo info = infos.Info(--last); + if (segmentsToOptimize.Contains(info)) + { + last++; + break; + } + } + + if (last > 0) + { + + spec = new MergeSpecification(); + + // First, enroll all "full" merges (size + // mergeFactor) to potentially be run concurrently: + while (last - maxNumSegments + 1 >= mergeFactor) + { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); - last -= mergeFactor; - } - - // Only if there are no full merges pending do we - // add a final partial (< mergeFactor segments) merge: - if (0 == spec.merges.Count) - { - if (maxNumSegments == 1) - { - - // Since we must optimize down to 1 segment, the - // choice is simple: - if (last > 1 || !IsOptimized(infos.Info(0))) + last -= mergeFactor; + } + + // Only if there are no full merges pending do we + // add a final partial (< mergeFactor segments) merge: + if (0 == spec.merges.Count) + { + if (maxNumSegments == 1) + { + + // Since we must optimize down to 1 segment, the + // choice is simple: + if (last > 1 || !IsOptimized(infos.Info(0))) spec.Add(MakeOneMerge(infos, infos.Range(0, last))); - } - else if (last > maxNumSegments) - { - - // Take care to pick a partial merge that is - // least cost, but does not make the index too - // lopsided. If we always just picked the - // partial tail then we could produce a highly - // lopsided index over time: - - // We must merge this many segments to leave - // maxNumSegments in the index (from when - // optimize was first kicked off): - int finalMergeSize = last - maxNumSegments + 1; - - // Consider all possible starting points: - long bestSize = 0; - int bestStart = 0; - - for (int i = 0; i < last - finalMergeSize + 1; i++) - { - long sumSize = 0; - for (int j = 0; j < finalMergeSize; j++) - sumSize += Size(infos.Info(j + i)); - if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) - { - bestStart = i; - bestSize = sumSize; - } - } + } + else if (last > maxNumSegments) + { + + // Take care to pick a partial merge that is + // least cost, but does not make the index too + // lopsided. If we always just picked the + // partial tail then we could produce a highly + // lopsided index over time: + + // We must merge this many segments to leave + // maxNumSegments in the index (from when + // optimize was first kicked off): + int finalMergeSize = last - maxNumSegments + 1; + + // Consider all possible starting points: + long bestSize = 0; + int bestStart = 0; + + for (int i = 0; i < last - finalMergeSize + 1; i++) + { + long sumSize = 0; + for (int j = 0; j < finalMergeSize; j++) + sumSize += Size(infos.Info(j + i)); + if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) + { + bestStart = i; + bestSize = sumSize; + } + } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); - } - } - } - else - spec = null; - } - else - spec = null; - - return spec; - } - - /// Finds merges necessary to expunge all deletes from the - /// index. We simply merge adjacent segments that have - /// deletes, up to mergeFactor at a time. - /// - public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) - { - int numSegments = segmentInfos.Count; - - if (Verbose()) - Message("findMergesToExpungeDeletes: " + numSegments + " segments"); - - MergeSpecification spec = new MergeSpecification(); - int firstSegmentWithDeletions = - 1; - for (int i = 0; i < numSegments; i++) - { - SegmentInfo info = segmentInfos.Info(i); - int delCount = writer.NumDeletedDocs(info); - if (delCount > 0) - { - if (Verbose()) - Message(" segment " + info.name + " has deletions"); - if (firstSegmentWithDeletions == - 1) - firstSegmentWithDeletions = i; - else if (i - firstSegmentWithDeletions == mergeFactor) - { - // We've seen mergeFactor segments in a row with - // deletions, so force a merge now: - if (Verbose()) - Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); + } + } + } + else + spec = null; + } + else + spec = null; + + return spec; + } + + /// Finds merges necessary to expunge all deletes from the + /// index. We simply merge adjacent segments that have + /// deletes, up to mergeFactor at a time. + /// + public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) + { + int numSegments = segmentInfos.Count; + + if (Verbose()) + Message("findMergesToExpungeDeletes: " + numSegments + " segments"); + + MergeSpecification spec = new MergeSpecification(); + int firstSegmentWithDeletions = - 1; + for (int i = 0; i < numSegments; i++) + { + SegmentInfo info = segmentInfos.Info(i); + int delCount = writer.NumDeletedDocs(info); + if (delCount > 0) + { + if (Verbose()) + Message(" segment " + info.name + " has deletions"); + if (firstSegmentWithDeletions == - 1) + firstSegmentWithDeletions = i; + else if (i - firstSegmentWithDeletions == mergeFactor) + { + // We've seen mergeFactor segments in a row with + // deletions, so force a merge now: + if (Verbose()) + Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); - firstSegmentWithDeletions = i; - } - } - else if (firstSegmentWithDeletions != - 1) - { - // End of a sequence of segments with deletions, so, - // merge those past segments even if it's fewer than - // mergeFactor segments - if (Verbose()) - Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); + firstSegmentWithDeletions = i; + } + } + else if (firstSegmentWithDeletions != - 1) + { + // End of a sequence of segments with deletions, so, + // merge those past segments even if it's fewer than + // mergeFactor segments + if (Verbose()) + Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); - firstSegmentWithDeletions = - 1; - } - } - - if (firstSegmentWithDeletions != - 1) - { - if (Verbose()) - Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); + firstSegmentWithDeletions = - 1; + } + } + + if (firstSegmentWithDeletions != - 1) + { + if (Verbose()) + Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); - } - - return spec; - } - - /// Checks if any merges are now necessary and returns a - /// if so. A merge - /// is necessary when there are more than - /// segments at a given level. When - /// multiple levels have too many segments, this method - /// will return multiple merges, allowing the - /// to use concurrency. - /// - public override MergeSpecification FindMerges(SegmentInfos infos) - { - - int numSegments = infos.Count; - if (Verbose()) - Message("findMerges: " + numSegments + " segments"); - - // Compute levels, which is just log (base mergeFactor) - // of the size of each segment - float[] levels = new float[numSegments]; - float norm = (float) System.Math.Log(mergeFactor); - - for (int i = 0; i < numSegments; i++) - { - SegmentInfo info = infos.Info(i); - long size = Size(info); - - // Floor tiny segments - if (size < 1) - size = 1; - levels[i] = (float) System.Math.Log(size) / norm; - } - - float levelFloor; - if (minMergeSize <= 0) - levelFloor = (float) 0.0; - else - { - levelFloor = (float) (System.Math.Log(minMergeSize) / norm); - } - - // Now, we quantize the log values into levels. The - // first level is any segment whose log size is within - // LEVEL_LOG_SPAN of the max size, or, who has such as - // segment "to the right". Then, we find the max of all - // other segments and use that to define the next level - // segment, etc. - - MergeSpecification spec = null; - - int start = 0; - while (start < numSegments) - { - - // Find max level of all segments not already - // quantized. - float maxLevel = levels[start]; - for (int i = 1 + start; i < numSegments; i++) - { - float level = levels[i]; - if (level > maxLevel) - maxLevel = level; - } - - // Now search backwards for the rightmost segment that - // falls into this level: - float levelBottom; - if (maxLevel < levelFloor) - // All remaining segments fall into the min level - levelBottom = - 1.0F; - else - { - levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); - - // Force a boundary at the level floor - if (levelBottom < levelFloor && maxLevel >= levelFloor) - levelBottom = levelFloor; - } - - int upto = numSegments - 1; - while (upto >= start) - { - if (levels[upto] >= levelBottom) - { - break; - } - upto--; - } - if (Verbose()) - Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); - - // Finally, record all merges that are viable at this level: - int end = start + mergeFactor; - while (end <= 1 + upto) - { - bool anyTooLarge = false; - for (int i = start; i < end; i++) - { - SegmentInfo info = infos.Info(i); - anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); - } - - if (!anyTooLarge) - { - if (spec == null) - spec = new MergeSpecification(); - if (Verbose()) - Message(" " + start + " to " + end + ": add this merge"); + } + + return spec; + } + + /// Checks if any merges are now necessary and returns a + /// if so. A merge + /// is necessary when there are more than + /// segments at a given level. When + /// multiple levels have too many segments, this method + /// will return multiple merges, allowing the + /// to use concurrency. + /// + public override MergeSpecification FindMerges(SegmentInfos infos) + { + + int numSegments = infos.Count; + if (Verbose()) + Message("findMerges: " + numSegments + " segments"); + + // Compute levels, which is just log (base mergeFactor) + // of the size of each segment + float[] levels = new float[numSegments]; + float norm = (float) System.Math.Log(mergeFactor); + + for (int i = 0; i < numSegments; i++) + { + SegmentInfo info = infos.Info(i); + long size = Size(info); + + // Floor tiny segments + if (size < 1) + size = 1; + levels[i] = (float) System.Math.Log(size) / norm; + } + + float levelFloor; + if (minMergeSize <= 0) + levelFloor = (float) 0.0; + else + { + levelFloor = (float) (System.Math.Log(minMergeSize) / norm); + } + + // Now, we quantize the log values into levels. The + // first level is any segment whose log size is within + // LEVEL_LOG_SPAN of the max size, or, who has such as + // segment "to the right". Then, we find the max of all + // other segments and use that to define the next level + // segment, etc. + + MergeSpecification spec = null; + + int start = 0; + while (start < numSegments) + { + + // Find max level of all segments not already + // quantized. + float maxLevel = levels[start]; + for (int i = 1 + start; i < numSegments; i++) + { + float level = levels[i]; + if (level > maxLevel) + maxLevel = level; + } + + // Now search backwards for the rightmost segment that + // falls into this level: + float levelBottom; + if (maxLevel < levelFloor) + // All remaining segments fall into the min level + levelBottom = - 1.0F; + else + { + levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); + + // Force a boundary at the level floor + if (levelBottom < levelFloor && maxLevel >= levelFloor) + levelBottom = levelFloor; + } + + int upto = numSegments - 1; + while (upto >= start) + { + if (levels[upto] >= levelBottom) + { + break; + } + upto--; + } + if (Verbose()) + Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); + + // Finally, record all merges that are viable at this level: + int end = start + mergeFactor; + while (end <= 1 + upto) + { + bool anyTooLarge = false; + for (int i = start; i < end; i++) + { + SegmentInfo info = infos.Info(i); + anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); + } + + if (!anyTooLarge) + { + if (spec == null) + spec = new MergeSpecification(); + if (Verbose()) + Message(" " + start + " to " + end + ": add this merge"); spec.Add(MakeOneMerge(infos, infos.Range(start, end))); - } - else if (Verbose()) - Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); - - start = end; - end = start + mergeFactor; - } - - start = 1 + upto; - } - - return spec; - } + } + else if (Verbose()) + Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); + + start = end; + end = start + mergeFactor; + } + + start = 1 + upto; + } + + return spec; + } protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) { @@ -553,28 +553,28 @@ namespace Lucene.Net.Index return new OneMerge(infosToMerge, doCFS); } - /// - /// Gets or sets the largest segment (measured by document - /// count) that may be merged with other segments. - ///

Determines the largest segment (measured by - /// document count) that may be merged with other segments. - /// Small values (e.g., less than 10,000) are best for - /// interactive indexing, as this limits the length of - /// pauses while indexing to a few seconds. Larger values - /// are best for batched indexing and speedier - /// searches.

- /// - ///

The default value is .

- /// - ///

The default merge policy () - /// also allows you to set this - /// limit by net size (in MB) of the segment, using - /// .

- ///

- public virtual int MaxMergeDocs - { - set { this.maxMergeDocs = value; } - get { return maxMergeDocs; } - } - } + /// + /// Gets or sets the largest segment (measured by document + /// count) that may be merged with other segments. + ///

Determines the largest segment (measured by + /// document count) that may be merged with other segments. + /// Small values (e.g., less than 10,000) are best for + /// interactive indexing, as this limits the length of + /// pauses while indexing to a few seconds. Larger values + /// are best for batched indexing and speedier + /// searches.

+ /// + ///

The default value is .

+ /// + ///

The default merge policy () + /// also allows you to set this + /// limit by net size (in MB) of the segment, using + /// .

+ ///

+ public virtual int MaxMergeDocs + { + set { this.maxMergeDocs = value; } + get { return maxMergeDocs; } + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/MergeDocIDRemapper.cs ---------------------------------------------------------------------- diff --git a/src/core/Index/MergeDocIDRemapper.cs b/src/core/Index/MergeDocIDRemapper.cs index 2771b53..5c06721 100644 --- a/src/core/Index/MergeDocIDRemapper.cs +++ b/src/core/Index/MergeDocIDRemapper.cs @@ -20,108 +20,108 @@ using Lucene.Net.Support; namespace Lucene.Net.Index { - - /// Remaps docIDs after a merge has completed, where the - /// merged segments had at least one deletion. This is used - /// to renumber the buffered deletes in IndexWriter when a - /// merge of segments with deletions commits. - /// - - sealed class MergeDocIDRemapper - { - internal int[] starts; // used for binary search of mapped docID - internal int[] newStarts; // starts, minus the deletes - internal int[][] docMaps; // maps docIDs in the merged set - internal int minDocID; // minimum docID that needs renumbering - internal int maxDocID; // 1+ the max docID that needs renumbering - internal int docShift; // total # deleted docs that were compacted by this merge - - public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) - { - this.docMaps = docMaps; - SegmentInfo firstSegment = merge.segments.Info(0); - int i = 0; - while (true) - { - SegmentInfo info = infos.Info(i); - if (info.Equals(firstSegment)) - break; - minDocID += info.docCount; - i++; - } - - int numDocs = 0; - for (int j = 0; j < docMaps.Length; i++, j++) - { - numDocs += infos.Info(i).docCount; - System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); - } - maxDocID = minDocID + numDocs; - - starts = new int[docMaps.Length]; - newStarts = new int[docMaps.Length]; - - starts[0] = minDocID; - newStarts[0] = minDocID; - for (i = 1; i < docMaps.Length; i++) - { - int lastDocCount = merge.segments.Info(i - 1).docCount; - starts[i] = starts[i - 1] + lastDocCount; - newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; - } - docShift = numDocs - mergedDocCount; - - // There are rare cases when docShift is 0. It happens - // if you try to delete a docID that's out of bounds, - // because the SegmentReader still allocates deletedDocs - // and pretends it has deletions ... so we can't make - // this assert here - // assert docShift > 0; - - // Make sure it all adds up: - System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); - } - - public int Remap(int oldDocID) - { - if (oldDocID < minDocID) - // Unaffected by merge - return oldDocID; - else if (oldDocID >= maxDocID) - // This doc was "after" the merge, so simple shift - return oldDocID - docShift; - else - { - // Binary search to locate this document & find its new docID - int lo = 0; // search starts array - int hi = docMaps.Length - 1; // for first element less - - while (hi >= lo) - { - int mid = Number.URShift((lo + hi), 1); - int midValue = starts[mid]; - if (oldDocID < midValue) - hi = mid - 1; - else if (oldDocID > midValue) - lo = mid + 1; - else - { - // found a match - while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue) - { - mid++; // scan to last match - } - if (docMaps[mid] != null) - return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]]; - else - return newStarts[mid] + oldDocID - starts[mid]; - } - } - if (docMaps[hi] != null) - return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]]; - else - return newStarts[hi] + oldDocID - starts[hi]; - } - } - } + + /// Remaps docIDs after a merge has completed, where the + /// merged segments had at least one deletion. This is used + /// to renumber the buffered deletes in IndexWriter when a + /// merge of segments with deletions commits. + /// + + sealed class MergeDocIDRemapper + { + internal int[] starts; // used for binary search of mapped docID + internal int[] newStarts; // starts, minus the deletes + internal int[][] docMaps; // maps docIDs in the merged set + internal int minDocID; // minimum docID that needs renumbering + internal int maxDocID; // 1+ the max docID that needs renumbering + internal int docShift; // total # deleted docs that were compacted by this merge + + public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) + { + this.docMaps = docMaps; + SegmentInfo firstSegment = merge.segments.Info(0); + int i = 0; + while (true) + { + SegmentInfo info = infos.Info(i); + if (info.Equals(firstSegment)) + break; + minDocID += info.docCount; + i++; + } + + int numDocs = 0; + for (int j = 0; j < docMaps.Length; i++, j++) + { + numDocs += infos.Info(i).docCount; + System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); + } + maxDocID = minDocID + numDocs; + + starts = new int[docMaps.Length]; + newStarts = new int[docMaps.Length]; + + starts[0] = minDocID; + newStarts[0] = minDocID; + for (i = 1; i < docMaps.Length; i++) + { + int lastDocCount = merge.segments.Info(i - 1).docCount; + starts[i] = starts[i - 1] + lastDocCount; + newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; + } + docShift = numDocs - mergedDocCount; + + // There are rare cases when docShift is 0. It happens + // if you try to delete a docID that's out of bounds, + // because the SegmentReader still allocates deletedDocs + // and pretends it has deletions ... so we can't make + // this assert here + // assert docShift > 0; + + // Make sure it all adds up: + System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); + } + + public int Remap(int oldDocID) + { + if (oldDocID < minDocID) + // Unaffected by merge + return oldDocID; + else if (oldDocID >= maxDocID) + // This doc was "after" the merge, so simple shift + return oldDocID - docShift; + else + { + // Binary search to locate this document & find its new docID + int lo = 0; // search starts array + int hi = docMaps.Length - 1; // for first element less + + while (hi >= lo) + { + int mid = Number.URShift((lo + hi), 1); + int midValue = starts[mid]; + if (oldDocID < midValue) + hi = mid - 1; + else if (oldDocID > midValue) + lo = mid + 1; + else + { + // found a match + while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue) + { + mid++; // scan to last match + } + if (docMaps[mid] != null) + return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]]; + else + return newStarts[mid] + oldDocID - starts[mid]; + } + } + if (docMaps[hi] != null) + return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]]; + else + return newStarts[hi] + oldDocID - starts[hi]; + } + } + } } \ No newline at end of file