lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject lucene-solr:branch_7x: LUCENE-8290: Keep soft deletes in sync with on-disk DocValues
Date Thu, 03 May 2018 11:39:25 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 0c89db842 -> 8fdd3d758


LUCENE-8290: Keep soft deletes in sync with on-disk DocValues

Today we pass on the doc values update to the PendingDeletes
when it's applied. This might cause issues with a rentention policy
merge policy that will see a deleted document but not it's value on
disk.
This change moves back the PendingDeletes callback to flush time
in order to be consistent with what is actually updated on disk.

This change also makes sure we write values to disk on flush that
are in the reader pool as well as extra best effort checks to drop
fully deleted segments on flush, commit and getReader.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8fdd3d75
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8fdd3d75
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8fdd3d75

Branch: refs/heads/branch_7x
Commit: 8fdd3d7584bcc23442d6256cca94da0dbf2ccc10
Parents: 0c89db8
Author: Simon Willnauer <simonw@apache.org>
Authored: Wed May 2 16:17:15 2018 +0200
Committer: Simon Willnauer <simonw@apache.org>
Committed: Thu May 3 13:39:15 2018 +0200

----------------------------------------------------------------------
 .../org/apache/lucene/index/IndexWriter.java    | 51 ++++++++++++++------
 .../org/apache/lucene/index/PendingDeletes.java | 13 ++---
 .../apache/lucene/index/PendingSoftDeletes.java |  9 +---
 .../apache/lucene/index/ReadersAndUpdates.java  |  3 +-
 .../lucene/index/TestPendingSoftDeletes.java    |  9 ++--
 .../TestSoftDeletesRetentionMergePolicy.java    |  9 ++--
 6 files changed, 51 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fdd3d75/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index d4d08f9..3846124 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -510,16 +510,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable
{
 
             // TODO: we could instead just clone SIS and pull/incref readers in sync'd block,
and then do this w/o IW's lock?
             // Must do this sync'd on IW to prevent a merge from completing at the last second
and failing to write its DV updates:
-            if (readerPool.writeAllDocValuesUpdates()) {
-              checkpoint();
-            }
-
-            if (writeAllDeletes) {
-              // Must move the deletes to disk:
-              if (readerPool.commit(segmentInfos)) {
-                checkpointNoSIS();
-              }
-            }
+           writeReaderPool(writeAllDeletes);
 
             // Prevent segmentInfos from changing while opening the
             // reader; in theory we could instead do similar retry logic,
@@ -3134,11 +3125,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable
{
 
             applyAllDeletesAndUpdates();
             synchronized(this) {
-
-              if (readerPool.commit(segmentInfos)) {
-                checkpointNoSIS();
-              }
-
+              writeReaderPool(true);
               if (changeCount.get() != lastCommitChangeCount) {
                 // There are changes to commit, so we will write a new segments_N in startCommit.
                 // The act of committing is itself an NRT-visible change (an NRT reader that
was
@@ -3219,6 +3206,39 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable
{
       }
     }
   }
+
+  /**
+   * Ensures that all changes in the reader-pool are written to disk.
+   * @param writeDeletes if <code>true</code> if deletes should be written to
disk too.
+   */
+  private final void writeReaderPool(boolean writeDeletes) throws IOException {
+    assert Thread.holdsLock(this);
+    if (writeDeletes) {
+      if (readerPool.commit(segmentInfos)) {
+        checkpointNoSIS();
+      }
+    } else { // only write the docValues
+      if (readerPool.writeAllDocValuesUpdates()) {
+        checkpoint();
+      }
+    }
+    // now do some best effort to check if a segment is fully deleted
+    List<SegmentCommitInfo> toDrop = new ArrayList<>(); // don't modify segmentInfos
in-place
+    for (SegmentCommitInfo info : segmentInfos) {
+      ReadersAndUpdates readersAndUpdates = readerPool.get(info, false);
+      if (readersAndUpdates != null) {
+        if (isFullyDeleted(readersAndUpdates)) {
+          toDrop.add(info);
+        }
+      }
+    }
+    for (SegmentCommitInfo info : toDrop) {
+      dropDeletedSegment(info);
+    }
+    if (toDrop.isEmpty() == false) {
+      checkpoint();
+    }
+  }
   
   /**
    * Sets the iterator to provide the commit user data map at commit time.  Calling this
method
@@ -3505,6 +3525,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable
{
       anyChanges |= maybeMerge.getAndSet(false);
       
       synchronized(this) {
+        writeReaderPool(applyAllDeletes);
         doAfterFlush();
         success = true;
         return anyChanges;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fdd3d75/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
index ae91be8..1878665 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
@@ -235,18 +235,11 @@ class PendingDeletes {
   }
 
   /**
-   * Called before the given DocValuesFieldUpdates are written to disk
-   * @param info the field to apply
-   */
-  void onDocValuesUpdate(FieldInfo info) {
-  }
-
-  /**
-   * Called for every field update for the given field
-   * @param field the field that's updated
+   * Called for every field update for the given field at flush time
+   * @param info the field info of the field that's updated
    * @param iterator the values to apply
    */
-  void onDocValuesUpdate(String field, DocValuesFieldUpdates.Iterator iterator) throws IOException
{
+  void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws
IOException {
   }
 
   int numDeletesToMerge(MergePolicy policy, IOSupplier<CodecReader> readerIOSupplier)
throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fdd3d75/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
index a5e7b14..eae25e0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
@@ -120,14 +120,9 @@ final class PendingSoftDeletes extends PendingDeletes {
   }
 
   @Override
-  void onDocValuesUpdate(String field, DocValuesFieldUpdates.Iterator iterator) throws IOException
{
-    if (this.field.equals(field)) {
+  void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws
IOException {
+    if (this.field.equals(info.name)) {
       pendingDeleteCount += applySoftDeletes(iterator, getMutableBits());
-    }
-  }
-  @Override
-  void onDocValuesUpdate(FieldInfo info) {
-    if (field.equals(info.name)) {
       assert dvGeneration < info.getDocValuesGen() : "we have seen this generation update
already: " + dvGeneration + " vs. " + info.getDocValuesGen();
       assert dvGeneration != -2 : "docValues generation is still uninitialized";
       dvGeneration = info.getDocValuesGen();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fdd3d75/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index b31bc49..6e074ee 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@@ -161,7 +161,6 @@ final class ReadersAndUpdates {
       }
       fieldUpdates.add(update);
     }
-    pendingDeletes.onDocValuesUpdate(update.field, update.iterator());
   }
 
   public synchronized long getNumDVUpdates() {
@@ -334,7 +333,6 @@ final class ReadersAndUpdates {
       final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
       final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info,
fieldInfos, null, updatesContext, segmentSuffix);
       try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
-        pendingDeletes.onDocValuesUpdate(fieldInfo);
         Function<FieldInfo, DocValuesFieldUpdates.Iterator> updateSupplier = (info)
-> {
           if (info != fieldInfo) {
             throw new IllegalArgumentException("expected field info for field: " + fieldInfo.name
+ " but got: " + info.name);
@@ -345,6 +343,7 @@ final class ReadersAndUpdates {
           }
           return  DocValuesFieldUpdates.mergedIterator(subs);
         };
+        pendingDeletes.onDocValuesUpdate(fieldInfo, updateSupplier.apply(fieldInfo));
         if (type == DocValuesType.BINARY) {
           fieldsConsumer.addBinaryField(fieldInfo, new EmptyDocValuesProducer() {
             @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fdd3d75/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
index 8bd7ca6..7ddf0be 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
@@ -116,9 +116,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     List<Integer> docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS);
     List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 10));
     for (DocValuesFieldUpdates update : updates) {
-      deletes.onDocValuesUpdate(update.field, update.iterator());
+      deletes.onDocValuesUpdate(fieldInfo, update.iterator());
     }
-    deletes.onDocValuesUpdate(fieldInfo);
     assertEquals(4, deletes.numPendingDeletes());
     assertTrue(deletes.getLiveDocs().get(0));
     assertFalse(deletes.getLiveDocs().get(1));
@@ -135,9 +134,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     updates = Arrays.asList(singleUpdate(docsDeleted, 10));
     fieldInfo = new FieldInfo("_soft_deletes", 1, false, false, false, IndexOptions.NONE,
DocValuesType.NUMERIC, 1, Collections.emptyMap(), 0, 0);
     for (DocValuesFieldUpdates update : updates) {
-      deletes.onDocValuesUpdate(update.field, update.iterator());
+      deletes.onDocValuesUpdate(fieldInfo, update.iterator());
     }
-    deletes.onDocValuesUpdate(fieldInfo);
     assertEquals(5, deletes.numPendingDeletes());
     assertTrue(deletes.getLiveDocs().get(0));
     assertFalse(deletes.getLiveDocs().get(1));
@@ -180,9 +178,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     List<Integer> docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS);
     List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 3));
     for (DocValuesFieldUpdates update : updates) {
-      deletes.onDocValuesUpdate(update.field, update.iterator());
+      deletes.onDocValuesUpdate(fieldInfo, update.iterator());
     }
-    deletes.onDocValuesUpdate(fieldInfo);
     assertEquals(1, deletes.numPendingDeletes());
     assertTrue(deletes.getLiveDocs().get(0));
     assertFalse(deletes.getLiveDocs().get(1));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fdd3d75/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
index b868a2e..5f1ba6c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
@@ -289,6 +289,9 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase
{
               writer.softUpdateDocument(new Term("id", id), doc,
                   new NumericDocValuesField("soft_delete", 1));
             }
+            if (rarely()) {
+              writer.flush();
+            }
             ids.add(id);
           }
         } catch (IOException | InterruptedException e) {
@@ -382,13 +385,13 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase
{
     Document tombstone = new Document();
     tombstone.add(new NumericDocValuesField("soft_delete", 1));
     writer.softUpdateDocument(new Term("id", "1"), tombstone, new NumericDocValuesField("soft_delete",
1));
-    // Internally, forceMergeDeletes will call flush to flush pending updates
+    writer.forceMergeDeletes(true); // Internally, forceMergeDeletes will call flush to flush
pending updates
     // Thus, we will have two segments - both having soft-deleted documents.
     // We expect any MP to merge these segments into one segment
     // when calling forceMergeDeletes.
-    writer.forceMergeDeletes(true);
-    assertEquals(1, writer.maxDoc());
     assertEquals(1, writer.segmentInfos.asList().size());
+    assertEquals(1, writer.numDocs());
+    assertEquals(1, writer.maxDoc());
     writer.close();
     dir.close();
   }


Mime
View raw message