lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r659044 - /lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/SegmentMerger.java
Date Thu, 22 May 2008 08:47:57 GMT
Author: mikemccand
Date: Thu May 22 01:47:57 2008
New Revision: 659044

URL: http://svn.apache.org/viewvc?rev=659044&view=rev
Log:
LUCENE-1282 (to 2.3): added workaround for nasty JRE bug that strikes when merging very large
segments; also added safety to abort the merge (preventing index corruption) if we detect
the bug has struck

Modified:
    lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/SegmentMerger.java

Modified: lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/SegmentMerger.java?rev=659044&r1=659043&r2=659044&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/SegmentMerger.java Thu
May 22 01:47:57 2008
@@ -23,6 +23,7 @@
 import java.io.IOException;
 
 import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelectorResult;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexOutput;
@@ -316,7 +317,12 @@
                 if (checkAbort != null)
                   checkAbort.work(300*numDocs);
               } else {
-                fieldsWriter.addDocument(reader.document(j, fieldSelectorMerge));
+                // NOTE: it's very important to first assign
+                // to doc then pass it to
+                // termVectorsWriter.addAllDocVectors; see
+                // LUCENE-1282
+                Document doc = reader.document(j, fieldSelectorMerge);
+                fieldsWriter.addDocument(doc);
                 j++;
                 docCount++;
                 if (checkAbort != null)
@@ -330,8 +336,15 @@
         fieldsWriter.close();
       }
 
-      assert docCount*8 == directory.fileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION)
:
-        "after mergeFields: fdx size mismatch: " + docCount + " docs vs " + directory.fileLength(segment
+ "." + IndexFileNames.FIELDS_INDEX_EXTENSION) + " length in bytes of " + segment + "." +
IndexFileNames.FIELDS_INDEX_EXTENSION;
+      final long fdxFileLength = directory.fileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+
+      if (docCount*8 != fdxFileLength)
+        // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+        // we detect that the bug has struck, here, and
+        // throw an exception to prevent the corruption from
+        // entering the index.  See LUCENE-1282 for
+        // details.
+        throw new RuntimeException("mergeFields produced an invalid result: docCount is "
+ docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent
index corruption");
 
     } else
       // If we are skipping the doc stores, that means there
@@ -359,7 +372,12 @@
           // skip deleted docs
           if (reader.isDeleted(docNum)) 
             continue;
-          termVectorsWriter.addAllDocVectors(reader.getTermFreqVectors(docNum));
+          // NOTE: it's very important to first assign
+          // to vectors then pass it to
+          // termVectorsWriter.addAllDocVectors; see
+          // LUCENE-1282
+          TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
+          termVectorsWriter.addAllDocVectors(vectors);
           if (checkAbort != null)
             checkAbort.work(300);
         }
@@ -368,8 +386,15 @@
       termVectorsWriter.close();
     }
 
-    assert 4+mergedDocs*8 == directory.fileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)
:
-      "after mergeVectors: tvx size mismatch: " + mergedDocs + " docs vs " + directory.fileLength(segment
+ "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + " length in bytes of " + segment + "." +
IndexFileNames.VECTORS_INDEX_EXTENSION;
+    final long tvxSize = directory.fileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+
+    if (4+mergedDocs*8 != tvxSize)
+      // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+      // we detect that the bug has struck, here, and
+      // throw an exception to prevent the corruption from
+      // entering the index.  See LUCENE-1282 for
+      // details.
+      throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is
" + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index
corruption");
   }
 
   private IndexOutput freqOutput = null;



Mime
View raw message