From commits-return-102004-archive-asf-public=cust-asf.ponee.io@lucene.apache.org Wed Jul 4 20:50:30 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 1A4BE180608 for ; Wed, 4 Jul 2018 20:50:28 +0200 (CEST) Received: (qmail 44849 invoked by uid 500); 4 Jul 2018 18:50:28 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 44840 invoked by uid 99); 4 Jul 2018 18:50:28 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 04 Jul 2018 18:50:28 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B12A0DFC42; Wed, 4 Jul 2018 18:50:27 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: dnhatn@apache.org To: commits@lucene.apache.org Message-Id: <131990b4110b4b17820857bb2b1a8fab@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: lucene-solr:branch_7_4: LUCENE-8381: Do not count hard-deletes as soft-deletes in merges Date: Wed, 4 Jul 2018 18:50:27 +0000 (UTC) Repository: lucene-solr Updated Branches: refs/heads/branch_7_4 1cd10c108 -> a35f18e56 LUCENE-8381: Do not count hard-deletes as soft-deletes in merges Today if a document is soft-deleted, then hard-deleted, IW, however, considers that doc as soft-deleted when wrapping readers for merges. This change makes sure that IW excludes the hard-deleted documents from a merge reader, and does not count them as soft-deletes. Co-authored-by: Simon Willnauer Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a35f18e5 Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a35f18e5 Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a35f18e5 Branch: refs/heads/branch_7_4 Commit: a35f18e56b7c178cdd90bce1ea873d328683fd1c Parents: 1cd10c1 Author: Nhat Nguyen Authored: Tue Jul 3 22:56:46 2018 -0400 Committer: Nhat Nguyen Committed: Wed Jul 4 14:44:20 2018 -0400 ---------------------------------------------------------------------- .../apache/lucene/index/FilterCodecReader.java | 23 ++++++++++ .../org/apache/lucene/index/IndexWriter.java | 44 ++++++++++++++++---- .../index/SoftDeletesRetentionMergePolicy.java | 35 ++-------------- .../TestSoftDeletesRetentionMergePolicy.java | 38 +++++++++++++++++ 4 files changed, 101 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a35f18e5/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java ---------------------------------------------------------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java index fd36ecb..4187051 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterCodecReader.java @@ -127,4 +127,27 @@ public abstract class FilterCodecReader extends CodecReader { in.checkIntegrity(); } + /** + * Returns a filtered codec reader with the given live docs and numDocs. + */ + static FilterCodecReader wrapLiveDocs(CodecReader reader, Bits liveDocs, int numDocs) { + return new FilterCodecReader(reader) { + @Override + public CacheHelper getCoreCacheHelper() { + return reader.getCoreCacheHelper(); + } + @Override + public CacheHelper getReaderCacheHelper() { + return null; // we are altering live docs + } + @Override + public Bits getLiveDocs() { + return liveDocs; + } + @Override + public int numDocs() { + return numDocs; + } + }; + } } http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a35f18e5/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java ---------------------------------------------------------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index c4c5c57..e8ed2bb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -48,6 +48,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate; import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate; import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; @@ -4384,25 +4385,52 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, // Let the merge wrap readers List mergeReaders = new ArrayList<>(); - int numSoftDeleted = 0; - for (SegmentReader reader : merge.readers) { + int softDeleteCount = 0; + for (int r = 0; r < merge.readers.size(); r++) { + SegmentReader reader = merge.readers.get(r); CodecReader wrappedReader = merge.wrapForMerge(reader); validateMergeReader(wrappedReader); - mergeReaders.add(wrappedReader); if (softDeletesEnabled) { if (reader != wrappedReader) { // if we don't have a wrapped reader we won't preserve any soft-deletes - Bits liveDocs = wrappedReader.getLiveDocs(); - numSoftDeleted += PendingSoftDeletes.countSoftDeletes( - DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), wrappedReader), - liveDocs); + Bits hardLiveDocs = merge.hardLiveDocs.get(r); + Bits wrappedLiveDocs = wrappedReader.getLiveDocs(); + int hardDeleteCount = 0; + DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), wrappedReader); + if (softDeletedDocs != null) { + int docId; + while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) { + if (hardLiveDocs == null || hardLiveDocs.get(docId)) { + softDeleteCount++; + } else { + hardDeleteCount++; + } + } + } + } + // Wrap the wrapped reader again if we have excluded some hard-deleted docs + if (hardLiveDocs != null && hardDeleteCount > 0) { + Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() { + @Override + public boolean get(int index) { + return hardLiveDocs.get(index) && wrappedLiveDocs.get(index); + } + @Override + public int length() { + return hardLiveDocs.length(); + } + }; + wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount); + } } } + mergeReaders.add(wrappedReader); } final SegmentMerger merger = new SegmentMerger(mergeReaders, merge.info.info, infoStream, dirWrapper, globalFieldNumberMap, context); - merge.info.setSoftDelCount(numSoftDeleted); + merge.info.setSoftDelCount(softDeleteCount); merge.checkAborted(); merge.mergeStartNS = System.nanoTime(); http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a35f18e5/lucene/core/src/java/org/apache/lucene/index/SoftDeletesRetentionMergePolicy.java ---------------------------------------------------------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/SoftDeletesRetentionMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/SoftDeletesRetentionMergePolicy.java index 36a2208..182af50 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SoftDeletesRetentionMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/SoftDeletesRetentionMergePolicy.java @@ -73,7 +73,7 @@ public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMerge public boolean keepFullyDeletedSegment(IOSupplier readerIOSupplier) throws IOException { CodecReader reader = readerIOSupplier.get(); /* we only need a single hit to keep it no need for soft deletes to be checked*/ - Scorer scorer = getScorer(retentionQuerySupplier.get(), wrapLiveDocs(reader, null, reader.maxDoc())); + Scorer scorer = getScorer(retentionQuerySupplier.get(), FilterCodecReader.wrapLiveDocs(reader, null, reader.maxDoc())); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean atLeastOneHit = iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; @@ -88,7 +88,7 @@ public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMerge if (liveDocs == null) { // no deletes - just keep going return reader; } - CodecReader wrappedReader = wrapLiveDocs(reader, new Bits() { // only search deleted + CodecReader wrappedReader = FilterCodecReader.wrapLiveDocs(reader, new Bits() { // only search deleted @Override public boolean get(int index) { return liveDocs.get(index) == false; @@ -114,7 +114,7 @@ public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMerge } } assert reader.numDocs() + numExtraLiveDocs <= reader.maxDoc() : "numDocs: " + reader.numDocs() + " numExtraLiveDocs: " + numExtraLiveDocs + " maxDoc: " + reader.maxDoc(); - return wrapLiveDocs(reader, cloneLiveDocs, reader.numDocs() + numExtraLiveDocs); + return FilterCodecReader.wrapLiveDocs(reader, cloneLiveDocs, reader.numDocs() + numExtraLiveDocs); } else { return reader; } @@ -144,33 +144,6 @@ public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMerge return weight.scorer(reader.getContext()); } - /** - * Returns a codec reader with the given live docs - */ - private static CodecReader wrapLiveDocs(CodecReader reader, Bits liveDocs, int numDocs) { - return new FilterCodecReader(reader) { - @Override - public CacheHelper getCoreCacheHelper() { - return reader.getCoreCacheHelper(); - } - - @Override - public CacheHelper getReaderCacheHelper() { - return null; // we are altering live docs - } - - @Override - public Bits getLiveDocs() { - return liveDocs; - } - - @Override - public int numDocs() { - return numDocs; - } - }; - } - @Override public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier readerSupplier) throws IOException { final int numDeletesToMerge = super.numDeletesToMerge(info, delCount, readerSupplier); @@ -180,7 +153,7 @@ public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMerge BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new DocValuesFieldExistsQuery(field), BooleanClause.Occur.FILTER); builder.add(retentionQuerySupplier.get(), BooleanClause.Occur.FILTER); - Scorer scorer = getScorer(builder.build(), wrapLiveDocs(reader, null, reader.maxDoc())); + Scorer scorer = getScorer(builder.build(), FilterCodecReader.wrapLiveDocs(reader, null, reader.maxDoc())); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); Bits liveDocs = reader.getLiveDocs(); http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a35f18e5/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java ---------------------------------------------------------------------- diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java index be1f7ac..bb36ba3 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java @@ -569,6 +569,44 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase { IOUtils.close(writer, dir); } + public void testMergeSoftDeleteAndHardDelete() throws Exception { + Directory dir = newDirectory(); + String softDelete = "soft_delete"; + IndexWriterConfig config = newIndexWriterConfig() + .setSoftDeletesField(softDelete) + .setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", + MatchAllDocsQuery::new, new LogDocMergePolicy())); + config.setReaderPooling(true); + IndexWriter writer = new IndexWriter(dir, config); + Document d = new Document(); + d.add(new StringField("id", "0", Field.Store.YES)); + writer.addDocument(d); + d = new Document(); + d.add(new StringField("id", "1", Field.Store.YES)); + d.add(new NumericDocValuesField("soft_delete", 1)); + writer.addDocument(d); + try (DirectoryReader reader = writer.getReader()) { + assertEquals(2, reader.maxDoc()); + assertEquals(1, reader.numDocs()); + } + while (true) { + try (DirectoryReader reader = writer.getReader()) { + TopDocs topDocs = new IndexSearcher(new NoDeletesWrapper(reader)).search(new TermQuery(new Term("id", "1")), 1); + assertEquals(1, topDocs.totalHits); + if (writer.tryDeleteDocument(reader, topDocs.scoreDocs[0].doc) > 0) { + break; + } + } + } + writer.forceMergeDeletes(true); + assertEquals(1, writer.segmentInfos.size()); + SegmentCommitInfo si = writer.segmentInfos.info(0); + assertEquals(0, si.getSoftDelCount()); // hard-delete should supersede the soft-delete + assertEquals(0, si.getDelCount()); + assertEquals(1, si.info.maxDoc()); + IOUtils.close(writer, dir); + } + static void doUpdate(Term doc, IndexWriter writer, Field... fields) throws IOException { long seqId = -1; do { // retry if we just committing a merge