lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r619640 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Date Thu, 07 Feb 2008 21:13:38 GMT
Author: mikemccand
Date: Thu Feb  7 13:13:36 2008
New Revision: 619640

URL: http://svn.apache.org/viewvc?rev=619640&view=rev
Log:
LUCENE-1168: fix corruption cases with mixed term vectors and autoCommit=false

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=619640&r1=619639&r2=619640&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Feb  7 13:13:36 2008
@@ -44,6 +44,10 @@
  2. LUCENE-1163: Fixed bug in CharArraySet.contains(char[] buffer, int
     offset, int len) that was ignoring offset and thus giving the
     wrong answer.  (Thomas Peuss via Mike McCandless)
+
+ 3. LUCENE-1168: Fixed corruption cases when autoCommit=false and
+    documents have mixed term vectors (Suresh Guvvala via Mike
+    McCandless).
 	
 New features
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=619640&r1=619639&r2=619640&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Feb  7 13:13:36
2008
@@ -876,10 +876,12 @@
             tvf = directory.createOutput(docStoreSegment +  "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
             tvf.writeInt(TermVectorsReader.FORMAT_VERSION2);
 
-            // We must "catch up" for all docIDs that had no
-            // vectors before this one
-            for(int i=0;i<docID;i++) {
-              tvx.writeLong(0);
+            // We must "catch up" for all docs before us
+            // that had no vectors:
+            final long tvdPos = tvd.getFilePointer();
+            tvd.writeVInt(0);
+            for(int i=0;i<numDocsInStore-1;i++) {
+              tvx.writeLong(tvdPos);
               tvx.writeLong(0);
             }
           } catch (Throwable t) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=619640&r1=619639&r2=619640&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Thu Feb  7 13:13:36
2008
@@ -201,7 +201,7 @@
     int count = 0;
     while (count < numDocs) {
       final long offset;
-      final int docID = startDocID + count + 1;
+      final int docID = docStoreOffset + startDocID + count + 1;
       assert docID <= numTotalDocs;
       if (docID < numTotalDocs) 
         offset = indexStream.readLong();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=619640&r1=619639&r2=619640&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Thu Feb  7 13:13:36
2008
@@ -168,7 +168,8 @@
 
     int count = 0;
     while (count < numDocs) {
-      final int docID = startDocID + count + 1;
+      final int docID = docStoreOffset + startDocID + count + 1;
+      assert docID <= numTotalDocs;
       if (docID < numTotalDocs)  {
         tvdPosition = tvx.readLong();
         tvfPosition = tvx.readLong();

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=619640&r1=619639&r2=619640&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Feb  7 13:13:36
2008
@@ -2682,6 +2682,148 @@
     dir.close();
   }
 
+  // LUCENE-1168
+  public void testTermVectorCorruption() throws IOException {
+
+    Directory dir = new MockRAMDirectory();
+    for(int iter=0;iter<4;iter++) {
+      final boolean autoCommit = 1==iter/2;
+      IndexWriter writer = new IndexWriter(dir,
+                                           autoCommit, new StandardAnalyzer(),
+                                           IndexWriter.MaxFieldLength.LIMITED);
+      writer.setMaxBufferedDocs(2);
+      writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+      writer.setMergeScheduler(new SerialMergeScheduler());
+      writer.setMergePolicy(new LogDocMergePolicy());
+
+      Document document = new Document();
+
+      Field storedField = new Field("stored", "stored", Field.Store.YES,
+                                    Field.Index.NO);
+      document.add(storedField);
+      writer.addDocument(document);
+      writer.addDocument(document);
+
+      document = new Document();
+      document.add(storedField);
+      Field termVectorField = new Field("termVector", "termVector",
+                                        Field.Store.NO, Field.Index.UN_TOKENIZED,
+                                        Field.TermVector.WITH_POSITIONS_OFFSETS);
+
+      document.add(termVectorField);
+      writer.addDocument(document);
+      writer.optimize();
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      for(int i=0;i<reader.numDocs();i++) {
+        reader.document(i);
+        reader.getTermFreqVectors(i);
+      }
+      reader.close();
+
+      writer = new IndexWriter(dir,
+                               autoCommit, new StandardAnalyzer(),
+                               IndexWriter.MaxFieldLength.LIMITED);
+      writer.setMaxBufferedDocs(2);
+      writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+      writer.setMergeScheduler(new SerialMergeScheduler());
+      writer.setMergePolicy(new LogDocMergePolicy());
+
+      Directory[] indexDirs = { dir};
+      writer.addIndexes(indexDirs);
+      writer.close();
+    }
+    dir.close();
+  }
+
+  // LUCENE-1168
+  public void testTermVectorCorruption2() throws IOException {
+    Directory dir = new MockRAMDirectory();
+    for(int iter=0;iter<4;iter++) {
+      final boolean autoCommit = 1==iter/2;
+      IndexWriter writer = new IndexWriter(dir,
+                                           autoCommit, new StandardAnalyzer(),
+                                           IndexWriter.MaxFieldLength.LIMITED);
+      writer.setMaxBufferedDocs(2);
+      writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+      writer.setMergeScheduler(new SerialMergeScheduler());
+      writer.setMergePolicy(new LogDocMergePolicy());
+
+      Document document = new Document();
+
+      Field storedField = new Field("stored", "stored", Field.Store.YES,
+                                    Field.Index.NO);
+      document.add(storedField);
+      writer.addDocument(document);
+      writer.addDocument(document);
+
+      document = new Document();
+      document.add(storedField);
+      Field termVectorField = new Field("termVector", "termVector",
+                                        Field.Store.NO, Field.Index.UN_TOKENIZED,
+                                        Field.TermVector.WITH_POSITIONS_OFFSETS);
+      document.add(termVectorField);
+      writer.addDocument(document);
+      writer.optimize();
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      assertTrue(reader.getTermFreqVectors(0)==null);
+      assertTrue(reader.getTermFreqVectors(1)==null);
+      assertTrue(reader.getTermFreqVectors(2)!=null);
+      reader.close();
+    }
+    dir.close();
+  }
+
+  // LUCENE-1168
+  public void testTermVectorCorruption3() throws IOException {
+    Directory dir = new MockRAMDirectory();
+    IndexWriter writer = new IndexWriter(dir,
+                                         false, new StandardAnalyzer(),
+                                         IndexWriter.MaxFieldLength.LIMITED);
+    writer.setMaxBufferedDocs(2);
+    writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+    writer.setMergeScheduler(new SerialMergeScheduler());
+    writer.setMergePolicy(new LogDocMergePolicy());
+
+    Document document = new Document();
+
+    document = new Document();
+    Field storedField = new Field("stored", "stored", Field.Store.YES,
+                                  Field.Index.NO);
+    document.add(storedField);
+    Field termVectorField = new Field("termVector", "termVector",
+                                      Field.Store.NO, Field.Index.UN_TOKENIZED,
+                                      Field.TermVector.WITH_POSITIONS_OFFSETS);
+    document.add(termVectorField);
+    for(int i=0;i<10;i++)
+      writer.addDocument(document);
+    writer.close();
+
+    writer = new IndexWriter(dir,
+                             false, new StandardAnalyzer(),
+                             IndexWriter.MaxFieldLength.LIMITED);
+    writer.setMaxBufferedDocs(2);
+    writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
+    writer.setMergeScheduler(new SerialMergeScheduler());
+    writer.setMergePolicy(new LogDocMergePolicy());
+    for(int i=0;i<6;i++)
+      writer.addDocument(document);
+
+    writer.optimize();
+    writer.close();
+
+    IndexReader reader = IndexReader.open(dir);
+    for(int i=0;i<10;i++) {
+      reader.getTermFreqVectors(i);
+      reader.document(i);
+    }
+    reader.close();
+    dir.close();
+  }
+
   // LUCENE-1084: test user-specified field length
   public void testUserSpecifiedMaxFieldLength() throws IOException {
     Directory dir = new MockRAMDirectory();



Mime
View raw message