lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r620747 - in /lucene/java/branches/lucene_2_3: CHANGES.txt src/java/org/apache/lucene/index/DocumentsWriter.java src/java/org/apache/lucene/index/FieldsReader.java src/test/org/apache/lucene/index/TestIndexWriter.java
Date Tue, 12 Feb 2008 10:41:42 GMT
Author: mikemccand
Date: Tue Feb 12 02:41:37 2008
New Revision: 620747

URL: http://svn.apache.org/viewvc?rev=620747&view=rev
Log:
LUCENE-1168 (backport to 2.3 branch): Fixed corruption cases when autoCommit=false and documents
have mixed term vectors

Modified:
    lucene/java/branches/lucene_2_3/CHANGES.txt
    lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/branches/lucene_2_3/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/java/branches/lucene_2_3/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_3/CHANGES.txt?rev=620747&r1=620746&r2=620747&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_3/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_3/CHANGES.txt Tue Feb 12 02:41:37 2008
@@ -1,6 +1,24 @@
 Lucene Change Log
 $Id$
 
+Changes in runtime behavior
+
+API Changes
+
+Bug fixes
+    
+ 1. LUCENE-1168: Fixed corruption cases when autoCommit=false and
+    documents have mixed term vectors (Suresh Guvvala via Mike
+    McCandless).
+	
+New features
+
+Optimizations
+
+Documentation
+
+Test Cases
+
 ======================= Release 2.3.0 2008-01-23 =======================
 
 Changes in runtime behavior

Modified: lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=620747&r1=620746&r2=620747&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/DocumentsWriter.java
(original)
+++ lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/DocumentsWriter.java
Tue Feb 12 02:41:37 2008
@@ -876,10 +876,12 @@
             tvf = directory.createOutput(docStoreSegment +  "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
             tvf.writeInt(TermVectorsReader.FORMAT_VERSION);
 
-            // We must "catch up" for all docIDs that had no
-            // vectors before this one
-            for(int i=0;i<docID;i++)
-              tvx.writeLong(0);
+            // We must "catch up" for all docs before us
+            // that had no vectors:
+            final long tvdPos = tvd.getFilePointer();
+            tvd.writeVInt(0);
+            for(int i=0;i<numDocsInStore-1;i++)
+              tvx.writeLong(tvdPos);
           } catch (Throwable t) {
             throw new AbortException(t, DocumentsWriter.this);
           }

Modified: lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/FieldsReader.java?rev=620747&r1=620746&r2=620747&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/branches/lucene_2_3/src/java/org/apache/lucene/index/FieldsReader.java Tue
Feb 12 02:41:37 2008
@@ -201,7 +201,7 @@
     int count = 0;
     while (count < numDocs) {
       final long offset;
-      final int docID = startDocID + count + 1;
+      final int docID = docStoreOffset + startDocID + count + 1;
       assert docID <= numTotalDocs;
       if (docID < numTotalDocs) 
         offset = indexStream.readLong();

Modified: lucene/java/branches/lucene_2_3/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_3/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=620747&r1=620746&r2=620747&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_3/src/test/org/apache/lucene/index/TestIndexWriter.java
(original)
+++ lucene/java/branches/lucene_2_3/src/test/org/apache/lucene/index/TestIndexWriter.java
Tue Feb 12 02:41:37 2008
@@ -2656,4 +2656,141 @@
   public void testIOExceptionDuringWriteSegmentWithThreadsOnlyOnce() throws IOException {
     _testMultipleThreadsFailure(new FailOnlyInWriteSegment(true));
   }
+
+  // LUCENE-1168
+  public void testTermVectorCorruption() throws IOException {
+
+    Directory dir = new MockRAMDirectory();
+    for(int iter=0;iter<4;iter++) {
+      final boolean autoCommit = 1==iter/2;
+      IndexWriter writer = new IndexWriter(dir,
+                                           autoCommit, new StandardAnalyzer());
+      writer.setMaxBufferedDocs(2);
+      writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+      writer.setMergeScheduler(new SerialMergeScheduler());
+      writer.setMergePolicy(new LogDocMergePolicy());
+
+      Document document = new Document();
+
+      Field storedField = new Field("stored", "stored", Field.Store.YES,
+                                    Field.Index.NO);
+      document.add(storedField);
+      writer.addDocument(document);
+      writer.addDocument(document);
+
+      document = new Document();
+      document.add(storedField);
+      Field termVectorField = new Field("termVector", "termVector",
+                                        Field.Store.NO, Field.Index.UN_TOKENIZED,
+                                        Field.TermVector.WITH_POSITIONS_OFFSETS);
+
+      document.add(termVectorField);
+      writer.addDocument(document);
+      writer.optimize();
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      for(int i=0;i<reader.numDocs();i++) {
+        reader.document(i);
+        reader.getTermFreqVectors(i);
+      }
+      reader.close();
+
+      writer = new IndexWriter(dir,
+                               autoCommit, new StandardAnalyzer());
+      writer.setMaxBufferedDocs(2);
+      writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+      writer.setMergeScheduler(new SerialMergeScheduler());
+      writer.setMergePolicy(new LogDocMergePolicy());
+
+      Directory[] indexDirs = { dir};
+      writer.addIndexes(indexDirs);
+      writer.close();
+    }
+    dir.close();
+  }
+
+  // LUCENE-1168
+  public void testTermVectorCorruption2() throws IOException {
+    Directory dir = new MockRAMDirectory();
+    for(int iter=0;iter<4;iter++) {
+      final boolean autoCommit = 1==iter/2;
+      IndexWriter writer = new IndexWriter(dir,
+                                           autoCommit, new StandardAnalyzer());
+      writer.setMaxBufferedDocs(2);
+      writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+      writer.setMergeScheduler(new SerialMergeScheduler());
+      writer.setMergePolicy(new LogDocMergePolicy());
+
+      Document document = new Document();
+
+      Field storedField = new Field("stored", "stored", Field.Store.YES,
+                                    Field.Index.NO);
+      document.add(storedField);
+      writer.addDocument(document);
+      writer.addDocument(document);
+
+      document = new Document();
+      document.add(storedField);
+      Field termVectorField = new Field("termVector", "termVector",
+                                        Field.Store.NO, Field.Index.UN_TOKENIZED,
+                                        Field.TermVector.WITH_POSITIONS_OFFSETS);
+      document.add(termVectorField);
+      writer.addDocument(document);
+      writer.optimize();
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      assertTrue(reader.getTermFreqVectors(0)==null);
+      assertTrue(reader.getTermFreqVectors(1)==null);
+      assertTrue(reader.getTermFreqVectors(2)!=null);
+      reader.close();
+    }
+    dir.close();
+  }
+
+  // LUCENE-1168
+  public void testTermVectorCorruption3() throws IOException {
+    Directory dir = new MockRAMDirectory();
+    IndexWriter writer = new IndexWriter(dir,
+                                         false, new StandardAnalyzer());
+    writer.setMaxBufferedDocs(2);
+    writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+    writer.setMergeScheduler(new SerialMergeScheduler());
+    writer.setMergePolicy(new LogDocMergePolicy());
+
+    Document document = new Document();
+
+    document = new Document();
+    Field storedField = new Field("stored", "stored", Field.Store.YES,
+                                  Field.Index.NO);
+    document.add(storedField);
+    Field termVectorField = new Field("termVector", "termVector",
+                                      Field.Store.NO, Field.Index.UN_TOKENIZED,
+                                      Field.TermVector.WITH_POSITIONS_OFFSETS);
+    document.add(termVectorField);
+    for(int i=0;i<10;i++)
+      writer.addDocument(document);
+    writer.close();
+
+    writer = new IndexWriter(dir,
+                             false, new StandardAnalyzer());
+    writer.setMaxBufferedDocs(2);
+    writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+    writer.setMergeScheduler(new SerialMergeScheduler());
+    writer.setMergePolicy(new LogDocMergePolicy());
+    for(int i=0;i<6;i++)
+      writer.addDocument(document);
+
+    writer.optimize();
+    writer.close();
+
+    IndexReader reader = IndexReader.open(dir);
+    for(int i=0;i<10;i++) {
+      reader.getTermFreqVectors(i);
+      reader.document(i);
+    }
+    reader.close();
+    dir.close();
+  }
 }



Mime
View raw message