Return-Path: Delivered-To: apmail-incubator-lucene-net-commits-archive@minotaur.apache.org Received: (qmail 13851 invoked from network); 21 Mar 2009 12:52:26 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 21 Mar 2009 12:52:26 -0000 Received: (qmail 11096 invoked by uid 500); 21 Mar 2009 12:52:26 -0000 Delivered-To: apmail-incubator-lucene-net-commits-archive@incubator.apache.org Received: (qmail 11080 invoked by uid 500); 21 Mar 2009 12:52:26 -0000 Mailing-List: contact lucene-net-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@incubator.apache.org Delivered-To: mailing list lucene-net-commits@incubator.apache.org Received: (qmail 11071 invoked by uid 99); 21 Mar 2009 12:52:26 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 21 Mar 2009 05:52:26 -0700 X-ASF-Spam-Status: No, hits=-1996.5 required=10.0 tests=ALL_TRUSTED,URIBL_BLACK X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 21 Mar 2009 12:52:13 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id DED102388970; Sat, 21 Mar 2009 12:51:51 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r756927 [1/4] - in /incubator/lucene.net/trunk/C#/src/Lucene.Net: ./ Analysis/Standard/ Index/ Store/ Date: Sat, 21 Mar 2009 12:51:45 -0000 To: lucene-net-commits@incubator.apache.org From: digy@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090321125151.DED102388970@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: digy Date: Sat Mar 21 12:51:41 2009 New Revision: 756927 URL: http://svn.apache.org/viewvc?rev=756927&view=rev Log: LUCENENET-164 upgrade from version 2.3.1. to 2.3.2 (Lucene.Net Core) Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Lucene.Net-2.3.2-VS2005.csproj incubator/lucene.net/trunk/C#/src/Lucene.Net/Lucene.Net-2.3.2-VS2005.sln incubator/lucene.net/trunk/C#/src/Lucene.Net/Lucene.Net-2.3.2.ndoc Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriter.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/LogMergePolicy.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Lucene.Net.xml incubator/lucene.net/trunk/C#/src/Lucene.Net/Store/BufferedIndexInput.cs incubator/lucene.net/trunk/C#/src/Lucene.Net/Store/FSDirectory.cs Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs Sat Mar 21 12:51:41 2009 @@ -42,13 +42,34 @@ public class StandardTokenizer : Tokenizer { - private void InitBlock() - { - maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; - } - /// A private instance of the JFlex-constructed scanner - private StandardTokenizerImpl scanner; - + private void InitBlock() + { + maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; + } + + /// A private instance of the JFlex-constructed scanner + private StandardTokenizerImpl scanner; + + public const int ALPHANUM = 0; + public const int APOSTROPHE = 1; + public const int ACRONYM = 2; + public const int COMPANY = 3; + public const int EMAIL = 4; + public const int HOST = 5; + public const int NUM = 6; + public const int CJ = 7; + + /// this solves a bug where HOSTs that end with '.' are identified + /// as ACRONYMs. It is deprecated and will be removed in the next + /// release. + /// + public const int ACRONYM_DEP = 8; + + public static readonly System.String[] TOKEN_TYPES = new System.String[] { "", "", "", "", "", "", "", "", "" }; + + /** @deprecated Please use {@link #TOKEN_TYPES} instead */ + public static readonly String[] tokenImage = TOKEN_TYPES; + /// Specifies whether deprecated acronyms should be replaced with HOST type. /// This is false by default to support backward compatibility. ///

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs Sat Mar 21 12:51:41 2009 @@ -223,21 +223,21 @@ /* user code: */ - public const int ALPHANUM = 0; - public const int APOSTROPHE = 1; - public const int ACRONYM = 2; - public const int COMPANY = 3; - public const int EMAIL = 4; - public const int HOST = 5; - public const int NUM = 6; - public const int CJ = 7; + public const int ALPHANUM = StandardTokenizer.ALPHANUM; + public const int APOSTROPHE = StandardTokenizer.APOSTROPHE; + public const int ACRONYM = StandardTokenizer.ACRONYM; + public const int COMPANY = StandardTokenizer.COMPANY; + public const int EMAIL = StandardTokenizer.EMAIL; + public const int HOST = StandardTokenizer.HOST; + public const int NUM = StandardTokenizer.NUM; + public const int CJ = StandardTokenizer.CJ; /// this solves a bug where HOSTs that end with '.' are identified /// as ACRONYMs. It is deprecated and will be removed in the next /// release. /// - public const int ACRONYM_DEP = 8; - - public static readonly System.String[] TOKEN_TYPES = new System.String[]{"", "", "", "", "", "", "", "", ""}; + public const int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP; + + public static readonly System.String[] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES; public int Yychar() { Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex Sat Mar 21 12:51:41 2009 @@ -30,32 +30,22 @@ %{ -public static final int ALPHANUM = 0; -public static final int APOSTROPHE = 1; -public static final int ACRONYM = 2; -public static final int COMPANY = 3; -public static final int EMAIL = 4; -public static final int HOST = 5; -public static final int NUM = 6; -public static final int CJ = 7; +public static final int ALPHANUM = StandardTokenizer.ALPHANUM; +public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE; +public static final int ACRONYM = StandardTokenizer.ACRONYM; +public static final int COMPANY = StandardTokenizer.COMPANY; +public static final int EMAIL = StandardTokenizer.EMAIL; +public static final int HOST = StandardTokenizer.HOST; +public static final int NUM = StandardTokenizer.NUM; +public static final int CJ = StandardTokenizer.CJ; /** * @deprecated this solves a bug where HOSTs that end with '.' are identified * as ACRONYMs. It is deprecated and will be removed in the next * release. */ -public static final int ACRONYM_DEP = 8; +public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP; -public static final String [] TOKEN_TYPES = new String [] { - "", - "", - "", - "", - "", - "", - "", - "", - "" -}; +public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES; public final int yychar() { Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/AssemblyInfo.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs Sat Mar 21 12:51:41 2009 @@ -33,7 +33,7 @@ [assembly: AssemblyDefaultAlias("Lucene.Net")] [assembly: AssemblyCulture("")] -[assembly: AssemblyInformationalVersionAttribute("2.3.1")] +[assembly: AssemblyInformationalVersionAttribute("2.3.2")] // @@ -47,7 +47,7 @@ // You can specify all the values or you can default the Revision and Build Numbers // by using the '*' as shown below: -[assembly: AssemblyVersion("2.3.1.003")] +[assembly: AssemblyVersion("2.3.1.001")] // Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/CheckIndex.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs Sat Mar 21 12:51:41 2009 @@ -230,11 +230,11 @@ for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); - if (pos < 0) + if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } - if (pos <= lastPos) + if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } @@ -375,7 +375,15 @@ return false; } - + + static bool assertsOn; + + private static bool TestAsserts() + { + assertsOn = true; + return true; + } + [STAThread] public static void Main(System.String[] args) { @@ -393,7 +401,11 @@ out_Renamed.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has has any\n" + "corrup tion, else 0.\n"); System.Environment.Exit(1); } - + + System.Diagnostics.Debug.Assert(TestAsserts()); + if (!assertsOn) + System.Console.WriteLine("\nNote: testing will be more thorough if you run with System.Diagnostic.Debug.Assert() enabled."); + System.String dirName = args[0]; out_Renamed.WriteLine("\nOpening index @ " + dirName + "\n"); Directory dir = null; Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/DocumentsWriter.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriter.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriter.cs Sat Mar 21 12:51:41 2009 @@ -297,6 +297,9 @@ tvf.Close(); tvd.Close(); tvx = null; + System.Diagnostics.Debug.Assert(4 + numDocsInStore * 8 == directory.FileLength(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION), + "after flush: tvx size mismatch: " + numDocsInStore + " docs vs " + directory.FileLength(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + + " length in bytes of " + docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); } if (fieldsWriter != null) @@ -304,7 +307,10 @@ System.Diagnostics.Debug.Assert(docStoreSegment != null); fieldsWriter.Close(); fieldsWriter = null; - } + System.Diagnostics.Debug.Assert(numDocsInStore * 8 == directory.FileLength(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION), + "after flush: fdx size mismatch: " + numDocsInStore + " docs vs " + directory.FileLength(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) + + " length in bytes of " + docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); + } System.String s = docStoreSegment; docStoreSegment = null; @@ -717,8 +723,8 @@ { this.enclosingInstance = enclosingInstance; allFieldDataArray = new FieldData[10]; - postingsPool = new ByteBlockPool(enclosingInstance); - vectorsPool = new ByteBlockPool(enclosingInstance); + postingsPool = new ByteBlockPool(true, enclosingInstance); + vectorsPool = new ByteBlockPool(false, enclosingInstance); charPool = new CharBlockPool(enclosingInstance); } private DocumentsWriter enclosingInstance; @@ -878,8 +884,9 @@ ///

Initializes shared state for this new document internal void Init(Document doc, int docID) { - - System.Diagnostics.Debug.Assert(!isIdle); + + System.Diagnostics.Debug.Assert(!isIdle); + System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start")); this.docID = docID; docBoost = doc.GetBoost(); @@ -1799,10 +1806,9 @@ try { offsetEnd = offset - 1; - Token token; for (; ; ) { - token = stream.Next(localToken); + Token token = stream.Next(localToken); if (token == null) break; position += (token.GetPositionIncrement() - 1); @@ -2774,19 +2780,6 @@ if (segment == null) segment = writer.NewSegmentName(); - numDocsInRAM++; - - // We must at this point commit to flushing to ensure we - // always get N docs when we flush by doc count, even if - // > 1 thread is adding documents: - if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) - { - flushPending = true; - state.doFlushAfter = true; - } - else - state.doFlushAfter = false; - state.isIdle = false; try @@ -2798,12 +2791,22 @@ if (delTerm != null) { AddDeleteTerm(delTerm, state.docID); - if (!state.doFlushAfter) - state.doFlushAfter = TimeToFlushDeletes(); + state.doFlushAfter = TimeToFlushDeletes(); } - // Only increment nextDocID on successful init + // Only increment nextDocID and numDocsInRAM on successful init nextDocID++; - success = true; + numDocsInRAM++; + + // We must at this point commit to flushing to ensure we + // always get N docs when we flush by doc count, even if + // > 1 thread is adding documents: + if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) + { + flushPending = true; + state.doFlushAfter = true; + } + + success = true; } finally { @@ -2870,6 +2873,15 @@ { lock (this) { + // If this thread state had decided to flush, we + // must clear is so another thread can flush + if (state.doFlushAfter) + { + state.doFlushAfter = false; + flushPending = false; + System.Threading.Monitor.PulseAll(this); + } + // Immediately mark this document as deleted // since likely it was partially added. This // keeps indexing as "all or none" (atomic) when @@ -3434,10 +3446,14 @@ * hit a non-zero byte. */ sealed internal class ByteBlockPool { - public ByteBlockPool(DocumentsWriter enclosingInstance) + private bool trackAllocations; + + public ByteBlockPool(bool trackAllocations, DocumentsWriter enclosingInstance) { + trackAllocations = trackAllocations; InitBlock(enclosingInstance); } + private void InitBlock(DocumentsWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; @@ -3494,7 +3510,7 @@ Array.Copy(buffers, 0, newBuffers, 0, buffers.GetLength(0)); buffers = newBuffers; } - buffer = buffers[1 + bufferUpto] = Enclosing_Instance.GetByteBlock(); + buffer = buffers[1 + bufferUpto] = Enclosing_Instance.GetByteBlock(trackAllocations); bufferUpto++; byteUpto = 0; @@ -3688,7 +3704,7 @@ private System.Collections.ArrayList freeByteBlocks = new System.Collections.ArrayList(); /* Allocate another byte[] from the shared pool */ - internal byte[] GetByteBlock() + internal byte[] GetByteBlock(bool trackAllocations) { lock (this) { @@ -3707,7 +3723,8 @@ freeByteBlocks.RemoveAt(size - 1); b = (byte[]) tempObject; } - numBytesUsed += BYTE_BLOCK_SIZE; + if (trackAllocations) + numBytesUsed += BYTE_BLOCK_SIZE; return b; } } Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexModifier.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs Sat Mar 21 12:51:41 2009 @@ -380,7 +380,11 @@ } - /// Returns the number of documents currently in this index. + /// + /// Returns the number of documents currently in this index. If the writer is currently open, + /// this returns IndexWriter.DocCount(), else IndexReader.NumDocs(). But, note that + /// IndexWriter.DocCount() does not take deltions into account, unlike IndexReader.NumDocs(). + /// /// /// /// @@ -650,7 +654,7 @@ indexWriter.Close(); indexWriter = null; } - else + else if (indexReader != null) { indexReader.Close(); indexReader = null; Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexWriter.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs Sat Mar 21 12:51:41 2009 @@ -237,7 +237,8 @@ private static System.Object MESSAGE_ID_LOCK = new System.Object(); private static int MESSAGE_ID = 0; private int messageID = - 1; - + volatile private bool hitOOM; + private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text @@ -1236,6 +1237,13 @@ public virtual void Close(bool waitForMerges) { bool doClose; + + // If any methods have hit OutOfMemoryError, then abort + // on close, in case theinternal state of IndexWriter + // or DocumentsWriter is corrupt + if (hitOOM) + Abort(); + lock (this) { // Ensure that only one thread actually gets to do the closing: @@ -1285,6 +1293,11 @@ // Only allow a new merge to be triggered if we are // going to wait for merges: Flush(waitForMerges, true); + + if (waitForMerges) + // Give merge scheduler last chance to run, in case + // any pending merges are waiting + mergeScheduler.Merge(this); mergePolicy.Close(); @@ -1338,12 +1351,21 @@ } closed = true; } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } finally { lock (this) { - if (!closed) - closing = false; + if (!closed) + { + closing = false; + if (infoStream != null) + Message("hit exception while closing"); + } System.Threading.Monitor.PulseAll(this); } } @@ -1557,34 +1579,42 @@ EnsureOpen(); bool doFlush = false; bool success = false; - try - { - doFlush = docWriter.AddDocument(doc, analyzer); - success = true; - } - finally - { - if (!success) - { - - if (infoStream != null) - Message("hit exception adding document"); - - lock (this) - { - // If docWriter has some aborted files that were - // never incref'd, then we clean them up here - if (docWriter != null) - { - System.Collections.IList files = docWriter.AbortedFiles(); - if (files != null) - deleter.DeleteNewFiles(files); - } - } - } - } - if (doFlush) - Flush(true, false); + try + { + try + { + doFlush = docWriter.AddDocument(doc, analyzer); + success = true; + } + finally + { + if (!success) + { + + if (infoStream != null) + Message("hit exception adding document"); + + lock (this) + { + // If docWriter has some aborted files that were + // never incref'd, then we clean them up here + if (docWriter != null) + { + System.Collections.IList files = docWriter.AbortedFiles(); + if (files != null) + deleter.DeleteNewFiles(files); + } + } + } + } + if (doFlush) + Flush(true, false); + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } } /// Deletes the document(s) containing term. @@ -1595,9 +1625,17 @@ public virtual void DeleteDocuments(Term term) { EnsureOpen(); - bool doFlush = docWriter.BufferDeleteTerm(term); - if (doFlush) - Flush(true, false); + try + { + bool doFlush = docWriter.BufferDeleteTerm(term); + if (doFlush) + Flush(true, false); + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } } /// Deletes the document(s) containing any of the @@ -1610,11 +1648,19 @@ /// IOException if there is a low-level IO error public virtual void DeleteDocuments(Term[] terms) { - EnsureOpen(); - bool doFlush = docWriter.BufferDeleteTerms(terms); - if (doFlush) - Flush(true, false); - } + EnsureOpen(); + try + { + bool doFlush = docWriter.BufferDeleteTerms(terms); + if (doFlush) + Flush(true, false); + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } + } /// Updates a document by first deleting the document(s) /// containing term and then adding the new @@ -1653,34 +1699,43 @@ public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer) { EnsureOpen(); - bool doFlush = false; - bool success = false; - try - { - doFlush = docWriter.UpdateDocument(term, doc, analyzer); - success = true; - } - finally - { - if (!success) - { - - if (infoStream != null) - Message("hit exception updating document"); - - lock (this) - { - // If docWriter has some aborted files that were - // never incref'd, then we clean them up here - System.Collections.IList files = docWriter.AbortedFiles(); - if (files != null) - deleter.DeleteNewFiles(files); - } - } - } - if (doFlush) - Flush(true, false); - } + try + { + bool doFlush = false; + bool success = false; + try + { + doFlush = docWriter.UpdateDocument(term, doc, analyzer); + success = true; + } + finally + { + if (!success) + { + + if (infoStream != null) + Message("hit exception updating document"); + + lock (this) + { + // If docWriter has some aborted files that were + // never incref'd, then we clean them up here + System.Collections.IList files = docWriter.AbortedFiles(); + if (files != null) + deleter.DeleteNewFiles(files); + } + } + } + if (doFlush) + Flush(true, false); + + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } + } // for test purpose public /*internal*/ int GetSegmentCount() @@ -1838,7 +1893,7 @@ /// public virtual void Optimize(bool doWait) { - Optimize(1, true); + Optimize(1, doWait); } /// Just like {@link #Optimize(int)}, except you can @@ -2053,30 +2108,32 @@ */ private void StartTransaction() { - - if (infoStream != null) - Message("now start transaction"); - - System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0, "calling startTransaction with buffered delete terms not supported"); - System.Diagnostics.Debug.Assert(docWriter.GetNumDocsInRAM() == 0, "calling startTransaction with buffered documents not supported"); - - localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone(); - localAutoCommit = autoCommit; - - if (localAutoCommit) - { - - if (infoStream != null) - Message("flush at startTransaction"); - - Flush(); - // Turn off auto-commit during our local transaction: - autoCommit = false; - } - // We must "protect" our files at this point from - // deletion in case we need to rollback: - else - deleter.IncRef(segmentInfos, false); + lock (this) + { + if (infoStream != null) + Message("now start transaction"); + + System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0, "calling startTransaction with buffered delete terms not supported"); + System.Diagnostics.Debug.Assert(docWriter.GetNumDocsInRAM() == 0, "calling startTransaction with buffered documents not supported"); + + localRollbackSegmentInfos = (SegmentInfos)segmentInfos.Clone(); + localAutoCommit = autoCommit; + + if (localAutoCommit) + { + + if (infoStream != null) + Message("flush at startTransaction"); + + Flush(); + // Turn off auto-commit during our local transaction: + autoCommit = false; + } + // We must "protect" our files at this point from + // deletion in case we need to rollback: + else + deleter.IncRef(segmentInfos, false); + } } /* @@ -2085,32 +2142,34 @@ */ private void RollbackTransaction() { - - if (infoStream != null) - Message("now rollback transaction"); - - // First restore autoCommit in case we hit an exception below: - autoCommit = localAutoCommit; - - // Keep the same segmentInfos instance but replace all - // of its SegmentInfo instances. This is so the next - // attempt to commit using this instance of IndexWriter - // will always write to a new generation ("write once"). - segmentInfos.Clear(); - segmentInfos.AddRange(localRollbackSegmentInfos); - localRollbackSegmentInfos = null; - - // Ask deleter to locate unreferenced files we had - // created & remove them: - deleter.Checkpoint(segmentInfos, false); - - if (!autoCommit) - // Remove the incRef we did in startTransaction: - deleter.DecRef(segmentInfos); - - deleter.Refresh(); - FinishMerges(false); - stopMerges = false; + lock (this) + { + if (infoStream != null) + Message("now rollback transaction"); + + // First restore autoCommit in case we hit an exception below: + autoCommit = localAutoCommit; + + // Keep the same segmentInfos instance but replace all + // of its SegmentInfo instances. This is so the next + // attempt to commit using this instance of IndexWriter + // will always write to a new generation ("write once"). + segmentInfos.Clear(); + segmentInfos.AddRange(localRollbackSegmentInfos); + localRollbackSegmentInfos = null; + + // Ask deleter to locate unreferenced files we had + // created & remove them: + deleter.Checkpoint(segmentInfos, false); + + if (!autoCommit) + // Remove the incRef we did in startTransaction: + deleter.DecRef(segmentInfos); + + deleter.Refresh(); + FinishMerges(false); + stopMerges = false; + } } /* @@ -2120,38 +2179,40 @@ */ private void CommitTransaction() { - - if (infoStream != null) - Message("now commit transaction"); - - // First restore autoCommit in case we hit an exception below: - autoCommit = localAutoCommit; - - bool success = false; - try - { - Checkpoint(); - success = true; - } - finally - { - if (!success) - { - if (infoStream != null) - Message("hit exception committing transaction"); - - RollbackTransaction(); - } - } - - if (!autoCommit) - // Remove the incRef we did in startTransaction. - deleter.DecRef(localRollbackSegmentInfos); - - localRollbackSegmentInfos = null; - - // Give deleter a chance to remove files now: - deleter.Checkpoint(segmentInfos, autoCommit); + lock (this) + { + if (infoStream != null) + Message("now commit transaction"); + + // First restore autoCommit in case we hit an exception below: + autoCommit = localAutoCommit; + + bool success = false; + try + { + Checkpoint(); + success = true; + } + finally + { + if (!success) + { + if (infoStream != null) + Message("hit exception committing transaction"); + + RollbackTransaction(); + } + } + + if (!autoCommit) + // Remove the incRef we did in startTransaction. + deleter.DecRef(localRollbackSegmentInfos); + + localRollbackSegmentInfos = null; + + // Give deleter a chance to remove files now: + deleter.Checkpoint(segmentInfos, autoCommit); + } } /// Close the IndexWriter without committing @@ -2331,7 +2392,11 @@ /// each input Directory, so it is up to the caller to /// enforce this. /// - ///

After this completes, the index is optimized. + ///

NOTE: while this is running, any attempts to + /// add or delete documents (with another thread) will be + /// paused until this method completes. + /// + ///

After this completes, the index is optimized. /// ///

This method is transactional in how Exceptions are /// handled: it does not commit a new segments_N file until @@ -2371,47 +2436,64 @@ /// IOException if there is a low-level IO error public virtual void AddIndexes(Directory[] dirs) { - lock (this) - { - - EnsureOpen(); - if (infoStream != null) - Message("flush at addIndexes"); - Flush(); - - bool success = false; - - StartTransaction(); - - try - { - for (int i = 0; i < dirs.Length; i++) - { - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.Read(dirs[i]); - for (int j = 0; j < sis.Count; j++) - { - segmentInfos.Add(sis.Info(j)); // add each info - } - } - - Optimize(); - - success = true; - } - finally - { - if (success) - { - CommitTransaction(); - } - else - { - RollbackTransaction(); - } - } - } - } + + EnsureOpen(); + + // Do not allow add docs or deletes while we are running: + docWriter.PauseAllThreads(); + + try + { + if (infoStream != null) + Message("flush at addIndexes"); + Flush(); + + bool success = false; + + StartTransaction(); + + try + { + lock (this) + { + for (int i = 0; i < dirs.Length; i++) + { + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.Read(dirs[i]); + for (int j = 0; j < sis.Count; j++) + { + SegmentInfo info = sis.Info(j); + segmentInfos.Add(sis.Info(j)); // add each info + } + } + } + + Optimize(); + + success = true; + } + finally + { + if (success) + { + CommitTransaction(); + } + else + { + RollbackTransaction(); + } + } + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } + finally + { + docWriter.ResumeAllThreads(); + } + } private void ResetMergeExceptions() { @@ -2434,7 +2516,11 @@ /// each input Directory, so it is up to the caller to /// enforce this. /// - ///

+ ///

NOTE: while this is running, any attempts to + /// add or delete documents (with another thread) will be + /// paused until this method completes. + /// + ///

/// This requires this index not be among those to be added, and the /// upper bound* of those segment doc counts not exceed maxMergeDocs. /// @@ -2447,102 +2533,155 @@ /// IOException if there is a low-level IO error public virtual void AddIndexesNoOptimize(Directory[] dirs) { - lock (this) - { - - EnsureOpen(); - if (infoStream != null) - Message("flush at addIndexesNoOptimize"); - Flush(); - - bool success = false; - - StartTransaction(); - - try - { - - for (int i = 0; i < dirs.Length; i++) - { - if (directory == dirs[i]) - { - // cannot add this index: segments may be deleted in merge before added - throw new System.ArgumentException("Cannot add this index to itself"); - } - - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.Read(dirs[i]); - for (int j = 0; j < sis.Count; j++) - { - SegmentInfo info = sis.Info(j); - segmentInfos.Add(info); // add each info - } - } - - MaybeMerge(); - - // If after merging there remain segments in the index - // that are in a different directory, just copy these - // over into our index. This is necessary (before - // finishing the transaction) to avoid leaving the - // index in an unusable (inconsistent) state. - CopyExternalSegments(); - - success = true; - } - finally - { - if (success) - { - CommitTransaction(); - } - else - { - RollbackTransaction(); - } - } - } - } + + EnsureOpen(); + + // Do not allow add socs or deletes while we are running: + docWriter.PauseAllThreads(); + + try + { + if (infoStream != null) + Message("flush at addIndexesNoOptimize"); + Flush(); + + bool success = false; + + StartTransaction(); + + try + { + + lock (this) + { + for (int i = 0; i < dirs.Length; i++) + { + if (directory == dirs[i]) + { + // cannot add this index: segments may be deleted in merge before added + throw new System.ArgumentException("Cannot add this index to itself"); + } + + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.Read(dirs[i]); + for (int j = 0; j < sis.Count; j++) + { + SegmentInfo info = sis.Info(j); + segmentInfos.Add(info); // add each info + } + } + } + + MaybeMerge(); + + // If after merging there remain segments in the index + // that are in a different directory, just copy these + // over into our index. This is necessary (before + // finishing the transaction) to avoid leaving the + // index in an unusable (inconsistent) state. + CopyExternalSegments(); + + success = true; + } + finally + { + if (success) + { + CommitTransaction(); + } + else + { + RollbackTransaction(); + } + } + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } + finally + { + docWriter.ResumeAllThreads(); + } + } /* If any of our segments are using a directory != ours * then copy them over. Currently this is only used by * addIndexesNoOptimize(). */ private void CopyExternalSegments() { - lock (this) - { - int numSegments = segmentInfos.Count; - for (int i = 0; i < numSegments; i++) - { - SegmentInfo info = segmentInfos.Info(i); - if (info.dir != directory) - { - MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), info.GetUseCompoundFile()); - if (RegisterMerge(merge)) - { - pendingMerges.Remove(merge); - runningMerges.Add(merge, merge); - Merge(merge); - } - // This means there is a bug in the - // MergeScheduler. MergeSchedulers in general are - // not allowed to run a merge involving segments - // external to this IndexWriter's directory in the - // background because this would put the index - // into an inconsistent state (where segmentInfos - // has been written with such external segments - // that an IndexReader would fail to load). - else - throw new MergePolicy.MergeException("segment \"" + info.name + " exists in external directory yet the MergeScheduler executed the merge in a separate thread"); - } - } - } - } + bool any = false; + + while (true) + { + SegmentInfo info = null; + MergePolicy.OneMerge merge = null; + + lock (this) + { + int numSegments = segmentInfos.Count; + for (int i = 0; i < numSegments; i++) + { + info = segmentInfos.Info(i); + if (info.dir != directory) + { + merge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), info.GetUseCompoundFile()); + break; + } + } + } + if (merge != null) + { + if (RegisterMerge(merge)) + { + pendingMerges.Remove(merge); + runningMerges.Add(merge, merge); + any = true; + Merge(merge); + } + else + { + // This means there is a bug in the + // MergeScheduler. MergeSchedulers in general are + // not allowed to run a merge involving segments + // external to this IndexWriter's directory in the + // background because this would put the index + // into an inconsistent state (where segmentInfos + // has been written with such external segments + // that an IndexReader would fail to load). + throw new MergePolicy.MergeException("segment \"" + info.name + " exists in external directory yet the MergeScheduler executed the merge in a separate thread"); + } + } + else + { + // No more external segments + break; + } + } + + if (any) + // Sometimes, on copying an external segment over, + // more merges may become necessary: + mergeScheduler.Merge(this); + } ///

Merges the provided indexes into this index. ///

After this completes, the index is optimized.

///

The provided IndexReaders are not closed.

- ///

See {@link #AddIndexes(Directory[])} for + /// + ///

NOTE: the index in each Directory must not be + /// changed (opened by a writer) while this method is + /// running. Thiw method does not acquire a write lock in + /// each input Directory, so it is up to the caller to + /// enforce this. + ///

+ /// + ///

NOTE: while this is running, any attempts to + /// add or delete documents (with another thread) will be + /// paused until this method completes.

+ /// + ///

See {@link #AddIndexes(Directory[])} for /// details on transactional semantics, temporary free /// space required in the Directory, and non-CFS segments /// on an Exception.

@@ -2551,108 +2690,130 @@ /// IOException if there is a low-level IO error public virtual void AddIndexes(IndexReader[] readers) { - lock (this) - { - - EnsureOpen(); - Optimize(); // start with zero or 1 seg - - System.String mergedName = NewSegmentName(); - SegmentMerger merger = new SegmentMerger(this, mergedName, null); - - SegmentInfo info; - - IndexReader sReader = null; - try - { - if (segmentInfos.Count == 1) - { - // add existing index, if any - sReader = SegmentReader.Get(segmentInfos.Info(0)); - merger.Add(sReader); - } - - for (int i = 0; i < readers.Length; i++) - // add new indexes - merger.Add(readers[i]); - - bool success = false; - - StartTransaction(); - - try - { - int docCount = merger.Merge(); // merge 'em - - if (sReader != null) - { - sReader.Close(); - sReader = null; - } - - segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false); - segmentInfos.Add(info); - - success = true; - } - finally - { - if (!success) - { - if (infoStream != null) - Message("hit exception in addIndexes during merge"); - - RollbackTransaction(); - } - else - { - CommitTransaction(); - } - } - } - finally - { - if (sReader != null) - { - sReader.Close(); - } - } - - if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) - { - - bool success = false; - - StartTransaction(); - - try - { - merger.CreateCompoundFile(mergedName + ".cfs"); - info.SetUseCompoundFile(true); - } - finally - { - if (!success) - { - if (infoStream != null) - Message("hit exception building compound file in addIndexes during merge"); - - RollbackTransaction(); - } - else - { - CommitTransaction(); - } - } - } - } - } + + EnsureOpen(); + + // Do not allow add docs or deletes while we are running: + docWriter.PauseAllThreads(); + + try + { + Optimize(); // start with zero or 1 seg + + System.String mergedName = NewSegmentName(); + SegmentMerger merger = new SegmentMerger(this, mergedName, null); + + SegmentInfo info; + + IndexReader sReader = null; + try + { + lock (this) + { + if (segmentInfos.Count == 1) + { + // add existing index, if any + sReader = SegmentReader.Get(segmentInfos.Info(0)); + merger.Add(sReader); + } + } + + + for (int i = 0; i < readers.Length; i++) + // add new indexes + merger.Add(readers[i]); + + bool success = false; + + StartTransaction(); + + try + { + int docCount = merger.Merge(); // merge 'em + + if (sReader != null) + { + sReader.Close(); + sReader = null; + } + + lock (this) + { + segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new + info = new SegmentInfo(mergedName, docCount, directory, false, true, -1, null, false); + segmentInfos.Add(info); + } + success = true; + } + finally + { + if (!success) + { + if (infoStream != null) + Message("hit exception in addIndexes during merge"); + + RollbackTransaction(); + } + else + { + CommitTransaction(); + } + } + } + finally + { + if (sReader != null) + { + sReader.Close(); + } + } + + if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) + { + + bool success = false; + + StartTransaction(); + + try + { + merger.CreateCompoundFile(mergedName + ".cfs"); + lock (this) + { + info.SetUseCompoundFile(true); + } + } + finally + { + if (!success) + { + if (infoStream != null) + Message("hit exception building compound file in addIndexes during merge"); + + RollbackTransaction(); + } + else + { + CommitTransaction(); + } + } + } + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } + finally + { + docWriter.ResumeAllThreads(); + } + } // This is called after pending added and deleted // documents have been flushed to the Directory but before // the change is committed (new segments_N file written). - internal virtual void DoAfterFlush() + protected virtual void DoAfterFlush() { } @@ -2699,189 +2860,195 @@ docWriter.ResumeAllThreads(); return false; } - - try - { - - SegmentInfo newSegment = null; - - int numDocs = docWriter.GetNumDocsInRAM(); - - // Always flush docs if there are any - bool flushDocs = numDocs > 0; - - // With autoCommit=true we always must flush the doc - // stores when we flush - flushDocStores |= autoCommit; - System.String docStoreSegment = docWriter.GetDocStoreSegment(); - if (docStoreSegment == null) - flushDocStores = false; - - // Always flush deletes if there are any delete terms. - // TODO: when autoCommit=false we don't have to flush - // deletes with every flushed segment; we can save - // CPU/IO by buffering longer & flushing deletes only - // when they are full or writer is being closed. We - // have to fix the "applyDeletesSelectively" logic to - // apply to more than just the last flushed segment - bool flushDeletes = docWriter.HasDeletes(); - - if (infoStream != null) - { - Message(" flush: segment=" + docWriter.GetSegment() + " docStoreSegment=" + docWriter.GetDocStoreSegment() + " docStoreOffset=" + docWriter.GetDocStoreOffset() + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); - Message(" index before flush " + SegString()); - } - - int docStoreOffset = docWriter.GetDocStoreOffset(); - - // docStoreOffset should only be non-zero when - // autoCommit == false - System.Diagnostics.Debug.Assert(!autoCommit || 0 == docStoreOffset); - - bool docStoreIsCompoundFile = false; - - // Check if the doc stores must be separately flushed - // because other segments, besides the one we are about - // to flush, reference it - if (flushDocStores && (!flushDocs || !docWriter.GetSegment().Equals(docWriter.GetDocStoreSegment()))) - { - // We must separately flush the doc store - if (infoStream != null) - Message(" flush shared docStore segment " + docStoreSegment); - - docStoreIsCompoundFile = FlushDocStores(); - flushDocStores = false; - } - - System.String segment = docWriter.GetSegment(); - - // If we are flushing docs, segment must not be null: - System.Diagnostics.Debug.Assert(segment != null || !flushDocs); - - if (flushDocs || flushDeletes) - { - - SegmentInfos rollback = null; - - if (flushDeletes) - rollback = (SegmentInfos) segmentInfos.Clone(); - - bool success = false; - - try - { - if (flushDocs) - { - - if (0 == docStoreOffset && flushDocStores) - { - // This means we are flushing private doc stores - // with this segment, so it will not be shared - // with other segments - System.Diagnostics.Debug.Assert(docStoreSegment != null); - System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); - docStoreOffset = - 1; - docStoreIsCompoundFile = false; - docStoreSegment = null; - } - - int flushedDocCount = docWriter.Flush(flushDocStores); - - newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile); - segmentInfos.Add(newSegment); - } - - if (flushDeletes) - { - // we should be able to change this so we can - // buffer deletes longer and then flush them to - // multiple flushed segments, when - // autoCommit=false - ApplyDeletes(flushDocs); - DoAfterFlush(); - } - - Checkpoint(); - success = true; - } - finally - { - if (!success) - { - - if (infoStream != null) - Message("hit exception flushing segment " + segment); - - if (flushDeletes) - { - - // Carefully check if any partial .del files - // should be removed: - int size = rollback.Count; - for (int i = 0; i < size; i++) - { - System.String newDelFileName = segmentInfos.Info(i).GetDelFileName(); - System.String delFileName = rollback.Info(i).GetDelFileName(); - if (newDelFileName != null && !newDelFileName.Equals(delFileName)) - deleter.DeleteFile(newDelFileName); - } - - // Fully replace the segmentInfos since flushed - // deletes could have changed any of the - // SegmentInfo instances: - segmentInfos.Clear(); - segmentInfos.AddRange(rollback); - } - else - { - // Remove segment we added, if any: - if (newSegment != null && segmentInfos.Count > 0 && segmentInfos.Info(segmentInfos.Count - 1) == newSegment) - segmentInfos.RemoveAt(segmentInfos.Count - 1); - } - if (flushDocs) - docWriter.Abort(null); - DeletePartialSegmentsFile(); - deleter.Checkpoint(segmentInfos, false); - - if (segment != null) - deleter.Refresh(segment); - } - } - - deleter.Checkpoint(segmentInfos, autoCommit); - - if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) - { - success = false; - try - { - docWriter.CreateCompoundFile(segment); - newSegment.SetUseCompoundFile(true); - Checkpoint(); - success = true; - } - finally - { - if (!success) - { - if (infoStream != null) - Message("hit exception creating compound file for newly flushed segment " + segment); - newSegment.SetUseCompoundFile(false); - deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); - DeletePartialSegmentsFile(); - } - } - - deleter.Checkpoint(segmentInfos, autoCommit); - } - - return true; - } - else - { - return false; - } - } + + try + { + + SegmentInfo newSegment = null; + + int numDocs = docWriter.GetNumDocsInRAM(); + + // Always flush docs if there are any + bool flushDocs = numDocs > 0; + + // With autoCommit=true we always must flush the doc + // stores when we flush + flushDocStores |= autoCommit; + System.String docStoreSegment = docWriter.GetDocStoreSegment(); + if (docStoreSegment == null) + flushDocStores = false; + + // Always flush deletes if there are any delete terms. + // TODO: when autoCommit=false we don't have to flush + // deletes with every flushed segment; we can save + // CPU/IO by buffering longer & flushing deletes only + // when they are full or writer is being closed. We + // have to fix the "applyDeletesSelectively" logic to + // apply to more than just the last flushed segment + bool flushDeletes = docWriter.HasDeletes(); + + if (infoStream != null) + { + Message(" flush: segment=" + docWriter.GetSegment() + " docStoreSegment=" + docWriter.GetDocStoreSegment() + " docStoreOffset=" + docWriter.GetDocStoreOffset() + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); + Message(" index before flush " + SegString()); + } + + int docStoreOffset = docWriter.GetDocStoreOffset(); + + // docStoreOffset should only be non-zero when + // autoCommit == false + System.Diagnostics.Debug.Assert(!autoCommit || 0 == docStoreOffset); + + bool docStoreIsCompoundFile = false; + + // Check if the doc stores must be separately flushed + // because other segments, besides the one we are about + // to flush, reference it + if (flushDocStores && (!flushDocs || !docWriter.GetSegment().Equals(docWriter.GetDocStoreSegment()))) + { + // We must separately flush the doc store + if (infoStream != null) + Message(" flush shared docStore segment " + docStoreSegment); + + docStoreIsCompoundFile = FlushDocStores(); + flushDocStores = false; + } + + System.String segment = docWriter.GetSegment(); + + // If we are flushing docs, segment must not be null: + System.Diagnostics.Debug.Assert(segment != null || !flushDocs); + + if (flushDocs || flushDeletes) + { + + SegmentInfos rollback = null; + + if (flushDeletes) + rollback = (SegmentInfos)segmentInfos.Clone(); + + bool success = false; + + try + { + if (flushDocs) + { + + if (0 == docStoreOffset && flushDocStores) + { + // This means we are flushing private doc stores + // with this segment, so it will not be shared + // with other segments + System.Diagnostics.Debug.Assert(docStoreSegment != null); + System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); + docStoreOffset = -1; + docStoreIsCompoundFile = false; + docStoreSegment = null; + } + + int flushedDocCount = docWriter.Flush(flushDocStores); + + newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile); + segmentInfos.Add(newSegment); + } + + if (flushDeletes) + { + // we should be able to change this so we can + // buffer deletes longer and then flush them to + // multiple flushed segments, when + // autoCommit=false + ApplyDeletes(flushDocs); + } + + DoAfterFlush(); + + Checkpoint(); + success = true; + } + finally + { + if (!success) + { + + if (infoStream != null) + Message("hit exception flushing segment " + segment); + + if (flushDeletes) + { + + // Carefully check if any partial .del files + // should be removed: + int size = rollback.Count; + for (int i = 0; i < size; i++) + { + System.String newDelFileName = segmentInfos.Info(i).GetDelFileName(); + System.String delFileName = rollback.Info(i).GetDelFileName(); + if (newDelFileName != null && !newDelFileName.Equals(delFileName)) + deleter.DeleteFile(newDelFileName); + } + + // Fully replace the segmentInfos since flushed + // deletes could have changed any of the + // SegmentInfo instances: + segmentInfos.Clear(); + segmentInfos.AddRange(rollback); + } + else + { + // Remove segment we added, if any: + if (newSegment != null && segmentInfos.Count > 0 && segmentInfos.Info(segmentInfos.Count - 1) == newSegment) + segmentInfos.RemoveAt(segmentInfos.Count - 1); + } + if (flushDocs) + docWriter.Abort(null); + DeletePartialSegmentsFile(); + deleter.Checkpoint(segmentInfos, false); + + if (segment != null) + deleter.Refresh(segment); + } + } + + deleter.Checkpoint(segmentInfos, autoCommit); + + if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) + { + success = false; + try + { + docWriter.CreateCompoundFile(segment); + newSegment.SetUseCompoundFile(true); + Checkpoint(); + success = true; + } + finally + { + if (!success) + { + if (infoStream != null) + Message("hit exception creating compound file for newly flushed segment " + segment); + newSegment.SetUseCompoundFile(false); + deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); + DeletePartialSegmentsFile(); + } + } + + deleter.Checkpoint(segmentInfos, autoCommit); + } + + return true; + } + else + { + return false; + } + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } finally { docWriter.ClearFlushPending(); @@ -2946,6 +3113,12 @@ { System.Diagnostics.Debug.Assert(merge.registerDone); + + if (hitOOM) + return false; + + if (infoStream != null) + Message("CommitMerge: " + merge.SegString(directory)); // If merge was explicitly aborted, or, if abort() or // rollbackTransaction() had been called since our merge @@ -3155,61 +3328,74 @@ System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); bool success = false; - - try - { - - try - { - if (merge.info == null) - MergeInit(merge); - - if (infoStream != null) - Message("now merge\n merge=" + merge.SegString(directory) + "\n index=" + SegString()); - - MergeMiddle(merge); - success = true; - } - catch (MergePolicy.MergeAbortedException e) - { - merge.SetException(e); - AddMergeException(merge); - // We can ignore this exception, unless the merge - // involves segments from external directories, in - // which case we must throw it so, for example, the - // rollbackTransaction code in addIndexes* is - // executed. - if (merge.isExternal) - throw e; - } - } - finally - { - lock (this) - { - try - { - if (!success && infoStream != null) - Message("hit exception during merge"); - - MergeFinish(merge); - - // This merge (and, generally, any change to the - // segments) may now enable new merges, so we call - // merge policy & update pending merges. - if (success && !merge.IsAborted() && !closed && !closing) - UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); - } - finally - { - runningMerges.Remove(merge); - // Optimize may be waiting on the final optimize - // merge to finish; and finishMerges() may be - // waiting for all merges to finish: - System.Threading.Monitor.PulseAll(this); - } - } - } + + try + { + try + { + try + { + if (merge.info == null) + MergeInit(merge); + + if (infoStream != null) + Message("now merge\n merge=" + merge.SegString(directory) + "\n index=" + SegString()); + + MergeMiddle(merge); + success = true; + } + catch (MergePolicy.MergeAbortedException e) + { + merge.SetException(e); + AddMergeException(merge); + // We can ignore this exception, unless the merge + // involves segments from external directories, in + // which case we must throw it so, for example, the + // rollbackTransaction code in addIndexes* is + // executed. + if (merge.isExternal) + throw e; + } + } + finally + { + lock (this) + { + try + { + MergeFinish(merge); + + if (!success) + { + if (infoStream != null) + Message("hit exception during merge"); + AddMergeException(merge); + if (merge.info != null && !segmentInfos.Contains(merge.info)) + deleter.Refresh(merge.info.name); + } + + // This merge (and, generally, any change to the + // segments) may now enable new merges, so we call + // merge policy & update pending merges. + if (success && !merge.IsAborted() && !closed && !closing) + UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); + } + finally + { + runningMerges.Remove(merge); + // Optimize may be waiting on the final optimize + // merge to finish; and finishMerges() may be + // waiting for all merges to finish: + System.Threading.Monitor.PulseAll(this); + } + } + } + } + catch (OutOfMemoryException oom) + { + hitOOM = true; + throw oom; + } } /// Checks whether this merge involves any segments @@ -3265,13 +3451,39 @@ /// Does initial setup for a merge, which is fast but holds /// the synchronized lock on IndexWriter instance. /// - internal void MergeInit(MergePolicy.OneMerge merge) + internal void MergeInit(MergePolicy.OneMerge merge) + { + lock (this) + { + bool success = false; + try + { + _MergeInit(merge); + success = true; + } + finally + { + if (!success) + { + MergeFinish(merge); + runningMerges.Remove(merge); + } + } + } + } + + internal void _MergeInit(MergePolicy.OneMerge merge) { lock (this) { - + System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); + System.Diagnostics.Debug.Assert(merge.registerDone); - + + if (merge.info != null) + // mergeInit already done + return; + if (merge.IsAborted()) return ; @@ -3809,5 +4021,11 @@ DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS; MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH; } - } + + // Used only by assert for testing. + virtual protected internal bool TestPoint(string name) + { + return true; + } + } } \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/LogMergePolicy.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/LogMergePolicy.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/LogMergePolicy.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/LogMergePolicy.cs Sat Mar 21 12:51:41 2009 @@ -308,13 +308,6 @@ SegmentInfo info = infos.Info(i); long size = Size(info); - // Refuse to import a segment that's too large - if (info.docCount > maxMergeDocs && info.dir != directory) - throw new System.ArgumentException("Segment is too large (" + info.docCount + " docs vs max docs " + maxMergeDocs + ")"); - - if (size >= maxMergeSize && info.dir != directory) - throw new System.ArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")"); - // Floor tiny segments if (size < 1) size = 1; Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMerger.cs?rev=756927&r1=756926&r2=756927&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs Sat Mar 21 12:51:41 2009 @@ -395,6 +395,11 @@ { fieldsWriter.Close(); } + + System.Diagnostics.Debug.Assert(docCount*8 == directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION), + "after MergeFields: fdx size mismatch: " + docCount + " docs vs " + + directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) + + " length in bytes of " + segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we @@ -433,6 +438,11 @@ { termVectorsWriter.Close(); } + + System.Diagnostics.Debug.Assert(4 + mergedDocs * 8 == directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION), + "after MergeVectors: tvx size mismatch: " + mergedDocs + " docs vs " + + directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + + " length in bytes of " + segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); } private IndexOutput freqOutput = null;