Return-Path: Delivered-To: apmail-incubator-lucene-net-commits-archive@locus.apache.org Received: (qmail 54242 invoked from network); 25 Jun 2008 02:53:37 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 25 Jun 2008 02:53:37 -0000 Received: (qmail 74759 invoked by uid 500); 25 Jun 2008 02:53:38 -0000 Delivered-To: apmail-incubator-lucene-net-commits-archive@incubator.apache.org Received: (qmail 74734 invoked by uid 500); 25 Jun 2008 02:53:38 -0000 Mailing-List: contact lucene-net-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@incubator.apache.org Delivered-To: mailing list lucene-net-commits@incubator.apache.org Received: (qmail 74725 invoked by uid 99); 25 Jun 2008 02:53:38 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 24 Jun 2008 19:53:38 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 25 Jun 2008 02:52:43 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 369202388ABA; Tue, 24 Jun 2008 19:52:27 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r671404 [8/10] - /incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ Date: Wed, 25 Jun 2008 02:52:24 -0000 To: lucene-net-commits@incubator.apache.org From: aroush@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080625025227.369202388ABA@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentInfos.cs?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs Tue Jun 24 19:52:22 2008 @@ -16,6 +16,7 @@ */ using System; + using Directory = Lucene.Net.Store.Directory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; @@ -24,9 +25,9 @@ { [Serializable] - public sealed class SegmentInfos : System.Collections.ArrayList + sealed public class SegmentInfos : System.Collections.ArrayList { - private class AnonymousClassFindSegmentsFile:FindSegmentsFile + private class AnonymousClassFindSegmentsFile : FindSegmentsFile { private void InitBlock(SegmentInfos enclosingInstance) { @@ -41,23 +42,24 @@ } } - internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1) + internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1) : base(Param1) { InitBlock(enclosingInstance); } - public override System.Object DoBody(System.String segmentFileName) + protected internal override System.Object DoBody(System.String segmentFileName) { Enclosing_Instance.Read(directory, segmentFileName); return null; } } - private class AnonymousClassFindSegmentsFile1:FindSegmentsFile + + private class AnonymousClassFindSegmentsFile1 : FindSegmentsFile { - internal AnonymousClassFindSegmentsFile1(Lucene.Net.Store.Directory Param1):base(Param1) + internal AnonymousClassFindSegmentsFile1(Lucene.Net.Store.Directory Param1) : base(Param1) { } - public override System.Object DoBody(System.String segmentFileName) + protected internal override System.Object DoBody(System.String segmentFileName) { IndexInput input = directory.OpenInput(segmentFileName); @@ -69,8 +71,8 @@ format = input.ReadInt(); if (format < 0) { - if (format < Lucene.Net.Index.SegmentInfos.FORMAT_SINGLE_NORM_FILE) - throw new System.IO.IOException("Unknown format version: " + format); + if (format < Lucene.Net.Index.SegmentInfos.CURRENT_FORMAT) + throw new CorruptIndexException("Unknown format version: " + format); version = input.ReadLong(); // read version } } @@ -90,6 +92,7 @@ } } + /// The file format version, a negative number. /* Works since counter, the old 1st entry, is always >= 0 */ public const int FORMAT = - 1; @@ -105,18 +108,25 @@ /// public const int FORMAT_LOCKLESS = - 2; - /// This is the current file format written. It adds a - /// "hasSingleNormFile" flag into each segment info. + /// This format adds a "hasSingleNormFile" flag into each segment info. /// See LUCENE-756 /// for details. /// public const int FORMAT_SINGLE_NORM_FILE = - 3; + /// This format allows multiple segments to share a single + /// vectors and stored fields file. + /// + public const int FORMAT_SHARED_DOC_STORE = - 4; + + /* This must always point to the most recent file format. */ + private static readonly int CURRENT_FORMAT = FORMAT_SHARED_DOC_STORE; + public int counter = 0; // used to name new segments /// counts how often the index has been changed by adding or deleting docs. /// starting with the current time in milliseconds forces to create unique version numbers. /// - private long version = System.DateTime.Now.Millisecond; + private long version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; private long generation = 0; // generation of the "segments_N" for the next commit private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read @@ -124,9 +134,9 @@ // there was an IOException that had interrupted a commit /// If non-null, information about loading segments_N files - /// + /// /// - private static System.IO.TextWriter infoStream; + private static System.IO.StreamWriter infoStream; public SegmentInfo Info(int i) { @@ -146,31 +156,15 @@ return - 1; } long max = - 1; - int prefixLen = IndexFileNames.SEGMENTS.Length + 1; for (int i = 0; i < files.Length; i++) { - System.String file = (new System.IO.FileInfo(files[i])).Name; + System.String file = files[i]; if (file.StartsWith(IndexFileNames.SEGMENTS) && !file.Equals(IndexFileNames.SEGMENTS_GEN)) { - if (file.Equals(IndexFileNames.SEGMENTS)) - { - // Pre lock-less commits: - if (max == - 1) - { - max = 0; - } - } - else + long gen = GenerationFromSegmentsFileName(file); + if (gen > max) { -#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE - long v = Lucene.Net.Documents.NumberTools.ToLong(file.Substring(prefixLen)); -#else - long v = System.Convert.ToInt64(file.Substring(prefixLen), 16); -#endif - if (v > max) - { - max = v; - } + max = gen; } } } @@ -188,7 +182,7 @@ System.String[] files = directory.List(); if (files == null) { - throw new System.IO.IOException("Cannot read directory " + directory); + throw new System.IO.IOException("cannot read directory " + directory + ": list() returned null"); } return GetCurrentSegmentGeneration(files); } @@ -222,6 +216,26 @@ return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration); } + /// Parse the generation off the segments file name and + /// return it. + /// + public static long GenerationFromSegmentsFileName(System.String fileName) + { + if (fileName.Equals(IndexFileNames.SEGMENTS)) + { + return 0; + } + else if (fileName.StartsWith(IndexFileNames.SEGMENTS)) + { + return SupportClass.Number.ToInt64(fileName.Substring(1 + IndexFileNames.SEGMENTS.Length)); + } + else + { + throw new System.ArgumentException("fileName \"" + fileName + "\" is not a segments file"); + } + } + + /// Get the next segments_N filename that will be written. public System.String GetNextSegmentFileName() { @@ -246,24 +260,19 @@ /// /// -- segment file to load /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error public void Read(Directory directory, System.String segmentFileName) { bool success = false; + // Clear any previous segments: + Clear(); + IndexInput input = directory.OpenInput(segmentFileName); - if (segmentFileName.Equals(IndexFileNames.SEGMENTS)) - { - generation = 0; - } - else - { -#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE - generation = Lucene.Net.Documents.NumberTools.ToLong(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length)); -#else - generation = System.Convert.ToInt64(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length), 16); -#endif - } + generation = GenerationFromSegmentsFileName(segmentFileName); + lastGeneration = generation; try @@ -273,8 +282,8 @@ { // file contains explicit format info // check that it is a format we can understand - if (format < FORMAT_SINGLE_NORM_FILE) - throw new System.IO.IOException("Unknown format version: " + format); + if (format < CURRENT_FORMAT) + throw new CorruptIndexException("Unknown format version: " + format); version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } @@ -294,7 +303,7 @@ { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) - version = System.DateTime.Now.Millisecond; + version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; // old file format without version number else version = input.ReadLong(); // read version @@ -312,15 +321,18 @@ } } } + /// This version of read uses the retry logic (for lock-less /// commits) to find the right segments file to load. /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error public void Read(Directory directory) { generation = lastGeneration = - 1; - new AnonymousClassFindSegmentsFile(this, directory).run(); + new AnonymousClassFindSegmentsFile(this, directory).Run(); } public void Write(Directory directory) @@ -340,9 +352,11 @@ IndexOutput output = directory.CreateOutput(segmentFileName); + bool success = false; + try { - output.WriteInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT + output.WriteInt(CURRENT_FORMAT); // write FORMAT output.WriteLong(++version); // every write changes // the index output.WriteInt(counter); // write counter @@ -354,7 +368,20 @@ } finally { - output.Close(); + try + { + output.Close(); + success = true; + } + finally + { + if (!success) + { + // Try not to leave a truncated segments_N file in + // the index: + directory.DeleteFile(segmentFileName); + } + } } try @@ -386,38 +413,44 @@ public override System.Object Clone() { - SegmentInfos sis = new SegmentInfos(); + return new SegmentInfos(this); + } - // Copy Fields. const and static fields are ignored - sis.counter = this.counter; - sis.version = this.version; - sis.generation = this.generation; - sis.lastGeneration = this.lastGeneration; + private SegmentInfos(SegmentInfos si) : base(si) + { + } - for (int i = 0; i < this.Count; i++) - { - sis.Add(((SegmentInfo)this[i]).Clone()); - } - return sis; - } + public SegmentInfos() + { + } /// version number when this SegmentInfos was generated. public long GetVersion() { return version; } + public long GetGeneration() + { + return generation; + } + public long GetLastGeneration() + { + return lastGeneration; + } /// Current version number from segments file. + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error public static long ReadCurrentVersion(Directory directory) { - return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).run()); + return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run()); } /// If non-null, information about retries when loading /// the segments file will be printed to this. /// - public static void SetInfoStream(System.IO.TextWriter infoStream) + public static void SetInfoStream(System.IO.StreamWriter infoStream) { SegmentInfos.infoStream = infoStream; } @@ -438,7 +471,7 @@ defaultGenFileRetryCount = count; } - /// + /// /// public static int GetDefaultGenFileRetryCount() { @@ -453,7 +486,7 @@ defaultGenFileRetryPauseMsec = msec; } - /// + /// /// public static int GetDefaultGenFileRetryPauseMsec() { @@ -470,16 +503,16 @@ { defaultGenLookaheadCount = count; } - /// + /// /// public static int GetDefaultGenLookahedCount() { return defaultGenLookaheadCount; } - /// + /// /// - public static System.IO.TextWriter GetInfoStream() + public static System.IO.StreamWriter GetInfoStream() { return infoStream; } @@ -488,7 +521,7 @@ { if (infoStream != null) { - infoStream.WriteLine(SupportClass.ThreadClass.Current().Name + ": " + message); + infoStream.WriteLine("SIS [" + SupportClass.ThreadClass.Current().Name + "]: " + message); } } @@ -516,7 +549,7 @@ this.directory = directory; } - public System.Object run() + public System.Object Run() { System.String segmentFileName = null; long lastGen = - 1; @@ -539,116 +572,131 @@ // it. // We have three methods for determining the current - // generation. We try each in sequence. + // generation. We try the first two in parallel, and + // fall back to the third when necessary. while (true) { - // Method 1: list the directory and use the highest - // segments_N file. This method works well as long - // as there is no stale caching on the directory - // contents: - System.String[] files = null; - if (0 == method) { + + // Method 1: list the directory and use the highest + // segments_N file. This method works well as long + // as there is no stale caching on the directory + // contents (NOTE: NFS clients often have such stale + // caching): + System.String[] files = null; + + long genA = - 1; + if (directory != null) - { files = directory.List(); - } else { files = System.IO.Directory.GetFileSystemEntries(fileDirectory.FullName); - for (int i = 0; i < files.Length; i++) - { - System.IO.FileInfo fi = new System.IO.FileInfo(files[i]); - files[i] = fi.Name; - } - } - - gen = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); - - if (gen == - 1) - { - System.String s = ""; - for (int i = 0; i < files.Length; i++) - { - s += (" " + files[i]); - } - throw new System.IO.FileNotFoundException("no segments* file found: files:" + s); } - } - - // Method 2 (fallback if Method 1 isn't reliable): - // if the directory listing seems to be stale, then - // try loading the "segments.gen" file. - if (1 == method || (0 == method && lastGen == gen && retry)) - { - method = 1; + if (files != null) + genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); - for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) + Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA); + + // Method 2: open segments.gen and read its + // contents. Then we take the larger of the two + // gen's. This way, if either approach is hitting + // a stale cache (NFS) we have a better chance of + // getting the right generation. + long genB = - 1; + if (directory != null) { - IndexInput genInput = null; - try - { - genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); - } - catch (System.IO.IOException e) + for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) { - Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); - } - if (genInput != null) - { - + IndexInput genInput = null; try { - int version = genInput.ReadInt(); - if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) + genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); + } + catch (System.IO.FileNotFoundException e) + { + Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e); + break; + } + catch (System.IO.IOException e) + { + Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); + } + + if (genInput != null) + { + try { - long gen0 = genInput.ReadLong(); - long gen1 = genInput.ReadLong(); - Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); - if (gen0 == gen1) + int version = genInput.ReadInt(); + if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) { - // The file is consistent. - if (gen0 > gen) + long gen0 = genInput.ReadLong(); + long gen1 = genInput.ReadLong(); + Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); + if (gen0 == gen1) { - Lucene.Net.Index.SegmentInfos.Message("fallback to '" + IndexFileNames.SEGMENTS_GEN + "' check: now try generation " + gen0 + " > " + gen); - gen = gen0; + // The file is consistent. + genB = gen0; + break; } - break; } } + catch (System.IO.IOException err2) + { + // will retry + } + finally + { + genInput.Close(); + } } - catch (System.IO.IOException err2) + try { - // will retry + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); } - finally + catch (System.Threading.ThreadInterruptedException e) { - genInput.Close(); + // will retry } } - try - { - System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); - } - catch (System.Threading.ThreadInterruptedException e) + } + + Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); + + // Pick the larger of the two gen's: + if (genA > genB) + gen = genA; + else + gen = genB; + + if (gen == - 1) + { + // Neither approach found a generation + System.String s; + if (files != null) { - // will retry + s = ""; + for (int i = 0; i < files.Length; i++) + s += (" " + files[i]); } + else + s = " null"; + throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + s); } } - // Method 3 (fallback if Methods 2 & 3 are not - // reliable): since both directory cache and file - // contents cache seem to be stale, just advance the - // generation. - if (2 == method || (1 == method && lastGen == gen && retry)) + // Third method (fallback if first & second methods + // are not reliable): since both directory cache and + // file contents cache seem to be stale, just + // advance the generation. + if (1 == method || (0 == method && lastGen == gen && retry)) { - method = 2; + method = 1; if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount) { @@ -720,7 +768,20 @@ // try it if so: System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); - if (directory.FileExists(prevSegmentFileName)) + bool prevExists; + if (directory != null) + prevExists = directory.FileExists(prevSegmentFileName); + else + { + bool tmpBool; + if (System.IO.File.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName); + prevExists = tmpBool; + } + + if (prevExists) { Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'"); try @@ -747,7 +808,19 @@ /// during the processing that could have been caused by /// a writer committing. /// - public abstract System.Object DoBody(System.String segmentFileName); + protected internal abstract System.Object DoBody(System.String segmentFileName); + } + + /// Returns a new SegmentInfos containg the SegmentInfo + /// instances in the specified range first (inclusive) to + /// last (exclusive), so total number of segments returned + /// is last-first. + /// + public SegmentInfos Range(int first, int last) + { + SegmentInfos infos = new SegmentInfos(); + infos.AddRange((System.Collections.IList) ((System.Collections.ArrayList) this).GetRange(first, last - first)); + return infos; } } } \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMergeQueue.cs?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs Tue Jun 24 19:52:22 2008 @@ -16,6 +16,7 @@ */ using System; + using PriorityQueue = Lucene.Net.Util.PriorityQueue; namespace Lucene.Net.Index Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMerger.cs?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs Tue Jun 24 19:52:22 2008 @@ -16,11 +16,12 @@ */ using System; + using FieldSelector = Lucene.Net.Documents.FieldSelector; using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult; using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; -using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; namespace Lucene.Net.Index { @@ -33,12 +34,13 @@ /// /// /// - /// + /// /// - /// + /// /// - public sealed class SegmentMerger + sealed class SegmentMerger { + [Serializable] private class AnonymousClassFieldSelector : FieldSelector { public AnonymousClassFieldSelector(SegmentMerger enclosingInstance) @@ -69,7 +71,7 @@ } /// norms header placeholder - internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', (byte) 255}; + internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', unchecked((byte) -1)}; private Directory directory; private System.String segment; @@ -78,6 +80,21 @@ private System.Collections.ArrayList readers = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); private FieldInfos fieldInfos; + private int mergedDocs; + + private CheckAbort checkAbort; + + // Whether we should merge doc stores (stored fields and + // vectors files). When all segments we are merging + // already share the same doc store files, we don't need + // to merge the doc stores. + private bool mergeDocStores; + + /// Maximum number of contiguous documents to bulk-copy + /// when merging stored fields + /// + private const int MAX_RAW_MERGE_DOCS = 4192; + /// This ctor used only by test code. /// /// @@ -85,25 +102,27 @@ /// /// The name of the new segment /// - public SegmentMerger(Directory dir, System.String name) + internal SegmentMerger(Directory dir, System.String name) { InitBlock(); directory = dir; segment = name; } - internal SegmentMerger(IndexWriter writer, System.String name) + internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) { InitBlock(); directory = writer.GetDirectory(); segment = name; + if (merge != null) + checkAbort = new CheckAbort(merge, directory); termIndexInterval = writer.GetTermIndexInterval(); } /// Add an IndexReader to the collection of readers that are to be merged - /// reader + /// /// - public void Add(IndexReader reader) + internal void Add(IndexReader reader) { readers.Add(reader); } @@ -121,26 +140,50 @@ /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor /// The number of documents that were merged /// - /// IOException - public int Merge() + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + internal int Merge() + { + return Merge(true); + } + + /// Merges the readers specified by the {@link #add} method + /// into the directory passed to the constructor. + /// + /// if false, we will not merge the + /// stored fields nor vectors files + /// + /// The number of documents that were merged + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + internal int Merge(bool mergeDocStores) { - int value_Renamed; - value_Renamed = MergeFields(); + this.mergeDocStores = mergeDocStores; + + // NOTE: it's important to add calls to + // checkAbort.work(...) if you make any changes to this + // method that will spend alot of time. The frequency + // of this check impacts how long + // IndexWriter.close(false) takes to actually stop the + // threads. + + mergedDocs = MergeFields(); MergeTerms(); MergeNorms(); - if (fieldInfos.HasVectors()) + if (mergeDocStores && fieldInfos.HasVectors()) MergeVectors(); - return value_Renamed; + return mergedDocs; } /// close all IndexReaders that have been added. /// Should not be called before merge(). /// /// IOException - public void CloseReaders() + internal void CloseReaders() { for (int i = 0; i < readers.Count; i++) { @@ -150,16 +193,18 @@ } } - public System.Collections.ArrayList CreateCompoundFile(System.String fileName) + internal System.Collections.ArrayList CreateCompoundFile(System.String fileName) { - CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName); + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.Length + 1)); // Basic files for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++) { - files.Add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); + System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i]; + if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) + files.Add(segment + "." + ext); } // Fieldable norm files @@ -174,7 +219,7 @@ } // Vector files - if (fieldInfos.HasVectors()) + if (fieldInfos.HasVectors() && mergeDocStores) { for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++) { @@ -195,62 +240,169 @@ return files; } - private void AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) + private void AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads) { System.Collections.IEnumerator i = names.GetEnumerator(); while (i.MoveNext()) { - System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current; - System.String field = (System.String) e.Key; - fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field)); + System.String field = (System.String) i.Current; + fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads); } } /// /// The number of documents in all of the readers /// - /// IOException + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error private int MergeFields() { - fieldInfos = new FieldInfos(); // merge field names - int docCount = 0; + + if (!mergeDocStores) + { + // When we are not merging by doc stores, that means + // all segments were written as part of a single + // autoCommit=false IndexWriter session, so their field + // name -> number mapping are the same. So, we start + // with the fieldInfos of the last segment in this + // case, to keep that numbering. + SegmentReader sr = (SegmentReader) readers[readers.Count - 1]; + fieldInfos = (FieldInfos) sr.fieldInfos.Clone(); + } + else + { + fieldInfos = new FieldInfos(); // merge field names + } + for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; - AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); - AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); - AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); - AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); - AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); - fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); + if (reader is SegmentReader) + { + SegmentReader segmentReader = (SegmentReader) reader; + for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) + { + FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); + fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads); + } + } + else + { + AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); + fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); + } } fieldInfos.Write(directory, segment + ".fnm"); - FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); - - // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're - // in merge mode, we use this FieldSelector - FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); + int docCount = 0; - try + if (mergeDocStores) { + + // If the i'th reader is a SegmentReader and has + // identical fieldName -> number mapping, then this + // array will be non-null at position i: + SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count]; + + // If this reader is a SegmentReader, and all of its + // field name -> number mappings match the "merged" + // FieldInfos, then we can do a bulk copy of the + // stored fields: for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; - int maxDoc = reader.MaxDoc(); - for (int j = 0; j < maxDoc; j++) - if (!reader.IsDeleted(j)) + if (reader is SegmentReader) + { + SegmentReader segmentReader = (SegmentReader) reader; + bool same = true; + FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos(); + for (int j = 0; same && j < segmentFieldInfos.Size(); j++) + same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j)); + if (same) { - // skip deleted docs - fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); - docCount++; + matchingSegmentReaders[i] = segmentReader; } + } + } + + // Used for bulk-reading raw bytes for stored fields + int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; + + // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're + // in merge mode, we use this FieldSelector + FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); + + // merge field values + FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); + + try + { + for (int i = 0; i < readers.Count; i++) + { + IndexReader reader = (IndexReader) readers[i]; + SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; + FieldsReader matchingFieldsReader; + if (matchingSegmentReader != null) + matchingFieldsReader = matchingSegmentReader.GetFieldsReader(); + else + matchingFieldsReader = null; + int maxDoc = reader.MaxDoc(); + for (int j = 0; j < maxDoc; ) + { + if (!reader.IsDeleted(j)) + { + // skip deleted docs + if (matchingSegmentReader != null) + { + // We can optimize this case (doing a bulk + // byte copy) since the field numbers are + // identical + int start = j; + int numDocs = 0; + do + { + j++; + numDocs++; + } + while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS); + + IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); + fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); + docCount += numDocs; + if (checkAbort != null) + checkAbort.Work(300 * numDocs); + } + else + { + fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); + j++; + docCount++; + if (checkAbort != null) + checkAbort.Work(300); + } + } + else + j++; + } + } + } + finally + { + fieldsWriter.Close(); } } - finally - { - fieldsWriter.Close(); - } + // If we are skipping the doc stores, that means there + // are no deletions in any of these segments, so we + // just sum numDocs() of each segment to get total docCount + else + for (int i = 0; i < readers.Count; i++) + docCount += ((IndexReader) readers[i]).NumDocs(); + return docCount; } @@ -272,6 +424,8 @@ if (reader.IsDeleted(docNum)) continue; termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum)); + if (checkAbort != null) + checkAbort.Work(300); } } } @@ -285,7 +439,9 @@ private IndexOutput proxOutput = null; private TermInfosWriter termInfosWriter = null; private int skipInterval; + private int maxSkipLevels; private SegmentMergeQueue queue = null; + private DefaultSkipListWriter skipListWriter = null; private void MergeTerms() { @@ -295,6 +451,8 @@ proxOutput = directory.CreateOutput(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval); skipInterval = termInfosWriter.skipInterval; + maxSkipLevels = termInfosWriter.maxSkipLevels; + skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput); queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(); @@ -343,7 +501,10 @@ top = (SegmentMergeInfo) queue.Top(); } - MergeTermInfo(match, matchSize); // add new TermInfo + int df = MergeTermInfo(match, matchSize); // add new TermInfo + + if (checkAbort != null) + checkAbort.Work(df / 3.0); while (matchSize > 0) { @@ -368,14 +529,16 @@ /// /// number of cells in the array actually occupied /// - private void MergeTermInfo(SegmentMergeInfo[] smis, int n) + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + private int MergeTermInfo(SegmentMergeInfo[] smis, int n) { long freqPointer = freqOutput.GetFilePointer(); long proxPointer = proxOutput.GetFilePointer(); int df = AppendPostings(smis, n); // append posting data - long skipPointer = WriteSkip(); + long skipPointer = skipListWriter.WriteSkip(freqOutput); if (df > 0) { @@ -383,8 +546,12 @@ termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); termInfosWriter.Add(smis[0].term, termInfo); } + + return df; } + private byte[] payloadBuffer = null; + /// Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. @@ -396,15 +563,20 @@ /// /// number of documents across all segments where this term was found /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term - ResetSkip(); + skipListWriter.ResetSkip(); + bool storePayloads = fieldInfos.FieldInfo(smis[0].term.field).storePayloads; + int lastPayloadLength = - 1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); + System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); @@ -416,13 +588,14 @@ doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) - throw new System.SystemException("docs out of order (" + doc + " <= " + lastDoc + " )"); + throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); df++; if ((df % skipInterval) == 0) { - BufferSkip(lastDoc); + skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength); + skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 @@ -439,11 +612,41 @@ freqOutput.WriteVInt(freq); // write frequency in doc } + /** See {@link DocumentWriter#writePostings(Posting[], String) for + * documentation about the encoding of positions and payloads + */ int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); - proxOutput.WriteVInt(position - lastPosition); + int delta = position - lastPosition; + if (storePayloads) + { + int payloadLength = postings.GetPayloadLength(); + if (payloadLength == lastPayloadLength) + { + proxOutput.WriteVInt(delta * 2); + } + else + { + proxOutput.WriteVInt(delta * 2 + 1); + proxOutput.WriteVInt(payloadLength); + lastPayloadLength = payloadLength; + } + if (payloadLength > 0) + { + if (payloadBuffer == null || payloadBuffer.Length < payloadLength) + { + payloadBuffer = new byte[payloadLength]; + } + postings.GetPayload(payloadBuffer, 0); + proxOutput.WriteBytes(payloadBuffer, 0, payloadLength); + } + } + else + { + proxOutput.WriteVInt(delta); + } lastPosition = position; } } @@ -451,40 +654,6 @@ return df; } - private RAMOutputStream skipBuffer = new RAMOutputStream(); - private int lastSkipDoc; - private long lastSkipFreqPointer; - private long lastSkipProxPointer; - - private void ResetSkip() - { - skipBuffer.Reset(); - lastSkipDoc = 0; - lastSkipFreqPointer = freqOutput.GetFilePointer(); - lastSkipProxPointer = proxOutput.GetFilePointer(); - } - - private void BufferSkip(int doc) - { - long freqPointer = freqOutput.GetFilePointer(); - long proxPointer = proxOutput.GetFilePointer(); - - skipBuffer.WriteVInt(doc - lastSkipDoc); - skipBuffer.WriteVInt((int) (freqPointer - lastSkipFreqPointer)); - skipBuffer.WriteVInt((int) (proxPointer - lastSkipProxPointer)); - - lastSkipDoc = doc; - lastSkipFreqPointer = freqPointer; - lastSkipProxPointer = proxPointer; - } - - private long WriteSkip() - { - long skipPointer = freqOutput.GetFilePointer(); - skipBuffer.WriteTo(freqOutput); - return skipPointer; - } - private void MergeNorms() { byte[] normBuffer = null; @@ -528,6 +697,8 @@ } } } + if (checkAbort != null) + checkAbort.Work(maxDoc); } } } @@ -540,5 +711,34 @@ } } } + + internal sealed class CheckAbort + { + private double workCount; + private MergePolicy.OneMerge merge; + private Directory dir; + public CheckAbort(MergePolicy.OneMerge merge, Directory dir) + { + this.merge = merge; + this.dir = dir; + } + + /// Records the fact that roughly units amount of work + /// have been done since this method was last called. + /// When adding time-consuming code into SegmentMerger, + /// you should test different values for units to ensure + /// that the time in between calls to merge.checkAborted + /// is up to ~ 1 second. + /// + public void Work(double units) + { + workCount += units; + if (workCount >= 10000.0) + { + merge.CheckAborted(dir); + workCount = 0; + } + } + } } } \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReade-2r.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentReade-2r.cs?rev=671404&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReade-2r.cs (added) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReade-2r.cs Tue Jun 24 19:52:22 2008 @@ -0,0 +1,755 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using BitVector = Lucene.Net.Util.BitVector; + +namespace Lucene.Net.Index +{ + + /// $Id: SegmentReader.java 496851 2007-01-16 20:24:52Z mikemccand $ + /// + public class SegmentReader : IndexReader + { + private System.String segment; + private SegmentInfo si; + + internal FieldInfos fieldInfos; + private FieldsReader fieldsReader; + + internal TermInfosReader tis; + internal TermVectorsReader termVectorsReaderOrig = null; + internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot(); + + internal BitVector deletedDocs = null; + private bool deletedDocsDirty = false; + private bool normsDirty = false; + private bool undeleteAll = false; + + private bool rollbackDeletedDocsDirty = false; + private bool rollbackNormsDirty = false; + private bool rollbackUndeleteAll = false; + + internal IndexInput freqStream; + internal IndexInput proxStream; + + // Compound File Reader when based on a compound file segment + internal CompoundFileReader cfsReader = null; + + public FieldInfos FieldInfos + { + get { return fieldInfos; } + } + + public IndexInput ProxStream + { + get { return proxStream; } + set { proxStream = value; } + } + + private class Norm + { + private void InitBlock(SegmentReader enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SegmentReader enclosingInstance; + public SegmentReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek) + { + InitBlock(enclosingInstance); + this.in_Renamed = in_Renamed; + this.number = number; + this.normSeek = normSeek; + } + + internal IndexInput in_Renamed; + internal byte[] bytes; + internal bool dirty; + internal int number; + internal long normSeek; + internal bool rollbackDirty; + + internal void ReWrite(SegmentInfo si) + { + // NOTE: norms are re-written in regular directory, not cfs + + System.String oldFileName = si.GetNormFileName(this.number); + if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) + { + // Mark this file for deletion. Note that we don't + // actually try to delete it until the new segments files is + // successfully written: + Enclosing_Instance.deleter.AddPendingFile(oldFileName); + } + + si.AdvanceNormGen(this.number); + IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number)); + try + { + out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc()); + } + finally + { + out_Renamed.Close(); + } + this.dirty = false; + } + } + + private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable()); + + /// The class which implements SegmentReader. + private static System.Type IMPL; + + public SegmentReader() : base(null) + { + } + + public static SegmentReader Get(SegmentInfo si) + { + return Get(si.dir, si, null, false, false); + } + + public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir) + { + return Get(si.dir, si, sis, closeDir, true); + } + + public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir) + { + SegmentReader instance; + try + { + instance = (SegmentReader) System.Activator.CreateInstance(IMPL); + } + catch (System.Exception e) + { + throw new System.SystemException("cannot load SegmentReader class: " + e, e); + } + instance.Init(dir, sis, closeDir, ownDir); + instance.Initialize(si); + return instance; + } + + private void Initialize(SegmentInfo si) + { + segment = si.name; + this.si = si; + + bool success = false; + + try + { + // Use compound file directory for some files, if it exists + Directory cfsDir = Directory(); + if (si.GetUseCompoundFile()) + { + cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); + cfsDir = cfsReader; + } + + // No compound file exists - use the multi-file format + fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); + fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); + + // Verify two sources of "maxDoc" agree: + if (fieldsReader.Size() != si.docCount) + { + throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); + } + + tis = new TermInfosReader(cfsDir, segment, fieldInfos); + + // NOTE: the bitvector is stored using the regular directory, not cfs + if (HasDeletions(si)) + { + deletedDocs = new BitVector(Directory(), si.GetDelFileName()); + + // Verify # deletes does not exceed maxDoc for this segment: + if (deletedDocs.Count() > MaxDoc()) + { + throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); + } + } + + // make sure that all index files have been read or are kept open + // so that if an index update removes them we'll still have them + freqStream = cfsDir.OpenInput(segment + ".frq"); + proxStream = cfsDir.OpenInput(segment + ".prx"); + OpenNorms(cfsDir); + + if (fieldInfos.HasVectors()) + { + // open term vector files only as needed + termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); + } + success = true; + } + finally + { + + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) + { + DoClose(); + } + } + } + + protected internal override void DoCommit() + { + if (deletedDocsDirty) + { + // re-write deleted + System.String oldDelFileName = si.GetDelFileName(); + if (oldDelFileName != null) + { + // Mark this file for deletion. Note that we don't + // actually try to delete it until the new segments files is + // successfully written: + deleter.AddPendingFile(oldDelFileName); + } + + si.AdvanceDelGen(); + + // We can write directly to the actual name (vs to a + // .tmp & renaming it) because the file is not live + // until segments file is written: + deletedDocs.Write(Directory(), si.GetDelFileName()); + } + if (undeleteAll && si.HasDeletions()) + { + System.String oldDelFileName = si.GetDelFileName(); + if (oldDelFileName != null) + { + // Mark this file for deletion. Note that we don't + // actually try to delete it until the new segments files is + // successfully written: + deleter.AddPendingFile(oldDelFileName); + } + si.ClearDelGen(); + } + if (normsDirty) + { + // re-write norms + si.SetNumFields(fieldInfos.Size()); + System.Collections.IEnumerator values = norms.Values.GetEnumerator(); + while (values.MoveNext()) + { + Norm norm = (Norm) values.Current; + if (norm.dirty) + { + norm.ReWrite(si); + } + } + } + deletedDocsDirty = false; + normsDirty = false; + undeleteAll = false; + } + + protected internal override void DoClose() + { + if (fieldsReader != null) + { + fieldsReader.Close(); + } + if (tis != null) + { + tis.Close(); + } + + if (freqStream != null) + freqStream.Close(); + if (proxStream != null) + proxStream.Close(); + + CloseNorms(); + + if (termVectorsReaderOrig != null) + termVectorsReaderOrig.Close(); + + if (cfsReader != null) + cfsReader.Close(); + } + + internal static bool HasDeletions(SegmentInfo si) + { + return si.HasDeletions(); + } + + public override bool HasDeletions() + { + return deletedDocs != null; + } + + internal static bool UsesCompoundFile(SegmentInfo si) + { + return si.GetUseCompoundFile(); + } + + internal static bool HasSeparateNorms(SegmentInfo si) + { + return si.HasSeparateNorms(); + } + + protected internal override void DoDelete(int docNum) + { + if (deletedDocs == null) + deletedDocs = new BitVector(MaxDoc()); + deletedDocsDirty = true; + undeleteAll = false; + deletedDocs.Set(docNum); + } + + protected internal override void DoUndeleteAll() + { + deletedDocs = null; + deletedDocsDirty = false; + undeleteAll = true; + } + + internal virtual System.Collections.ArrayList Files() + { + System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16)); + + if (si.GetUseCompoundFile()) + { + System.String name = segment + ".cfs"; + if (Directory().FileExists(name)) + { + files.Add(name); + } + } + else + { + for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++) + { + System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; + if (Directory().FileExists(name)) + files.Add(name); + } + } + + if (si.HasDeletions()) + { + files.Add(si.GetDelFileName()); + } + + bool addedNrm = false; + for (int i = 0; i < fieldInfos.Size(); i++) + { + System.String name = si.GetNormFileName(i); + if (name != null && Directory().FileExists(name)) + { + if (name.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) + { + if (addedNrm) + continue; // add .nrm just once + addedNrm = true; + } + files.Add(name); + } + } + return files; + } + + public override TermEnum Terms() + { + return tis.Terms(); + } + + public override TermEnum Terms(Term t) + { + return tis.Terms(t); + } + + public override Document Document(int n, FieldSelector fieldSelector) + { + lock (this) + { + if (IsDeleted(n)) + throw new System.ArgumentException("attempt to access a deleted document"); + return fieldsReader.Doc(n, fieldSelector); + } + } + + public override bool IsDeleted(int n) + { + lock (this) + { + return (deletedDocs != null && deletedDocs.Get(n)); + } + } + + public override TermDocs TermDocs() + { + return new SegmentTermDocs(this); + } + + public override TermPositions TermPositions() + { + return new SegmentTermPositions(this); + } + + public override int DocFreq(Term t) + { + TermInfo ti = tis.Get(t); + if (ti != null) + return ti.docFreq; + else + return 0; + } + + public override int NumDocs() + { + int n = MaxDoc(); + if (deletedDocs != null) + n -= deletedDocs.Count(); + return n; + } + + public override int MaxDoc() + { + return si.docCount; + } + + /// + /// + public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption) + { + + System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); + for (int i = 0; i < fieldInfos.Size(); i++) + { + FieldInfo fi = fieldInfos.FieldInfo(i); + if (fieldOption == IndexReader.FieldOption.ALL) + { + fieldSet.Add(fi.name, fi.name); + } + else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) + { + fieldSet.Add(fi.name, fi.name); + } + else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) + { + fieldSet.Add(fi.name, fi.name); + } + else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) + { + fieldSet.Add(fi.name, fi.name); + } + else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) + { + fieldSet.Add(fi.name, fi.name); + } + else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) + { + fieldSet.Add(fi.name, fi.name); + } + else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) + { + fieldSet.Add(fi.name, fi.name); + } + else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) + { + fieldSet.Add(fi.name, fi.name); + } + else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) + { + fieldSet.Add(fi.name, fi.name); + } + } + return fieldSet; + } + + + public override bool HasNorms(System.String field) + { + lock (this) + { + return norms.ContainsKey(field); + } + } + + internal static byte[] CreateFakeNorms(int size) + { + byte[] ones = new byte[size]; + byte val = DefaultSimilarity.EncodeNorm(1.0f); + for (int index = 0; index < size; index++) + ones[index] = val; + return ones; + } + + private byte[] ones; + private byte[] FakeNorms() + { + if (ones == null) + ones = CreateFakeNorms(MaxDoc()); + return ones; + } + + // can return null if norms aren't stored + protected internal virtual byte[] GetNorms(System.String field) + { + lock (this) + { + Norm norm = (Norm) norms[field]; + if (norm == null) + return null; // not indexed, or norms not stored + if (norm.bytes == null) + { + // value not yet read + byte[] bytes = new byte[MaxDoc()]; + Norms(field, bytes, 0); + norm.bytes = bytes; // cache it + } + return norm.bytes; + } + } + + // returns fake norms if norms aren't available + public override byte[] Norms(System.String field) + { + lock (this) + { + byte[] bytes = GetNorms(field); + if (bytes == null) + bytes = FakeNorms(); + return bytes; + } + } + + protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed) + { + Norm norm = (Norm) norms[field]; + if (norm == null) + // not an indexed field + return ; + norm.dirty = true; // mark it dirty + normsDirty = true; + + Norms(field)[doc] = value_Renamed; // set the value + } + + /// Read norms into a pre-allocated array. + public override void Norms(System.String field, byte[] bytes, int offset) + { + lock (this) + { + + Norm norm = (Norm) norms[field]; + if (norm == null) + { + Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc()); + return ; + } + + if (norm.bytes != null) + { + // can copy from cache + Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc()); + return ; + } + + IndexInput normStream = (IndexInput) norm.in_Renamed.Clone(); + try + { + // read from disk + normStream.Seek(norm.normSeek); + normStream.ReadBytes(bytes, offset, MaxDoc()); + } + finally + { + normStream.Close(); + } + } + } + + + private void OpenNorms(Directory cfsDir) + { + long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now) + int maxDoc = MaxDoc(); + for (int i = 0; i < fieldInfos.Size(); i++) + { + FieldInfo fi = fieldInfos.FieldInfo(i); + if (fi.isIndexed && !fi.omitNorms) + { + Directory d = Directory(); + System.String fileName = si.GetNormFileName(fi.number); + if (!si.HasSeparateNorms(fi.number)) + { + d = cfsDir; + } + long normSeek = (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)?nextNormSeek:0); + norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number, normSeek); + nextNormSeek += maxDoc; // increment also if some norms are separate + } + } + } + + private void CloseNorms() + { + lock (norms.SyncRoot) + { + System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator(); + while (enumerator.MoveNext()) + { + Norm norm = (Norm) enumerator.Current; + norm.in_Renamed.Close(); + } + } + } + + /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + /// TermVectorsReader + /// + private TermVectorsReader GetTermVectorsReader() + { + TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal); + if (tvReader == null) + { + tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone(); + System.Threading.Thread.SetData(termVectorsLocal, tvReader); + } + return tvReader; + } + + /// Return a term frequency vector for the specified document and field. The + /// vector returned contains term numbers and frequencies for all terms in + /// the specified field of this document, if the field had storeTermVector + /// flag set. If the flag was not set, the method returns null. + /// + /// IOException + public override TermFreqVector GetTermFreqVector(int docNumber, System.String field) + { + // Check if this field is invalid or has no stored term vector + FieldInfo fi = fieldInfos.FieldInfo(field); + if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) + return null; + + TermVectorsReader termVectorsReader = GetTermVectorsReader(); + if (termVectorsReader == null) + return null; + + return termVectorsReader.Get(docNumber, field); + } + + + /// Return an array of term frequency vectors for the specified document. + /// The array contains a vector for each vectorized field in the document. + /// Each vector vector contains term numbers and frequencies for all terms + /// in a given vectorized field. + /// If no such fields existed, the method returns null. + /// + /// IOException + public override TermFreqVector[] GetTermFreqVectors(int docNumber) + { + if (termVectorsReaderOrig == null) + return null; + + TermVectorsReader termVectorsReader = GetTermVectorsReader(); + if (termVectorsReader == null) + return null; + + return termVectorsReader.Get(docNumber); + } + + /// Return the name of the segment this reader is reading. + internal virtual System.String GetSegmentName() + { + return segment; + } + + internal virtual void SetSegmentInfo(SegmentInfo info) + { + si = info; + } + + internal override void StartCommit() + { + base.StartCommit(); + rollbackDeletedDocsDirty = deletedDocsDirty; + rollbackNormsDirty = normsDirty; + rollbackUndeleteAll = undeleteAll; + System.Collections.IEnumerator values = norms.Values.GetEnumerator(); + while (values.MoveNext()) + { + Norm norm = (Norm) values.Current; + norm.rollbackDirty = norm.dirty; + } + } + + internal override void RollbackCommit() + { + base.RollbackCommit(); + deletedDocsDirty = rollbackDeletedDocsDirty; + normsDirty = rollbackNormsDirty; + undeleteAll = rollbackUndeleteAll; + System.Collections.IEnumerator values = norms.Values.GetEnumerator(); + while (values.MoveNext()) + { + Norm norm = (Norm) values.Current; + norm.dirty = norm.rollbackDirty; + } + } + static SegmentReader() + { + { + try + { + System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName); + IMPL = System.Type.GetType(name); + } + catch (System.Security.SecurityException se) + { + try + { + IMPL = System.Type.GetType(typeof(SegmentReader).FullName); + } + catch (System.Exception e) + { + throw new System.SystemException("cannot load default SegmentReader class: " + e, e); + } + } + catch (System.Exception e) + { + throw new System.SystemException("cannot load SegmentReader class: " + e, e); + } + } + } + } +} \ No newline at end of file