lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r606614 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/Hits.java src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java
Date Sun, 23 Dec 2007 20:50:30 GMT
Author: doronc
Date: Sun Dec 23 12:50:29 2007
New Revision: 606614

URL: http://svn.apache.org/viewvc?rev=606614&view=rev
Log:
LUCENE-1096: Fixed Hits behavior when hits' docs are deleted along with iterating the hits.

Added:
    lucene/java/trunk/src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java 
 (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=606614&r1=606613&r2=606614&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Dec 23 12:50:29 2007
@@ -189,6 +189,14 @@
 25. LUCENE-1042: Remove throwing of IOException in getTermFreqVector(int, String, TermVectorMapper)
to be consistent
     with other getTermFreqVector calls.  Also removed the throwing of the other IOException
in that method to be consistent.  (Karl Wettin via Grant Ingersoll)
     
+26. LUCENE-1096: Fixed Hits behavior when hits' docs are deleted 
+    along with iterating the hits. Deleting docs already retrieved 
+    now works seamlessly. If docs not yet retrieved are deleted 
+    (e.g. from another thread), and then, relying on the initial 
+    Hits.length(), an application attempts to retrieve more hits 
+    than actually exist , a ConcurrentMidificationException 
+    is thrown.  (Doron Cohen)
+    
     
 New features
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java?rev=606614&r1=606613&r2=606614&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java Sun Dec 23 12:50:29 2007
@@ -18,6 +18,7 @@
  */
 
 import java.io.IOException;
+import java.util.ConcurrentModificationException;
 import java.util.Vector;
 import java.util.Iterator;
 
@@ -31,6 +32,12 @@
  * performance issues. If you need to iterate over many or all hits, consider
  * using the search method that takes a {@link HitCollector}.
  * </p>
+ * <p><b>Note:</b> Deleting matching documents concurrently with traversing

+ * the hits, might, when deleting hits that were not yet retrieved, decrease
+ * {@link #length()}. In such case, 
+ * {@link java.util.ConcurrentModificationException ConcurrentModificationException}
+ * is thrown when accessing hit <code>n</code> &ge; current_{@link #length()}

+ * (but <code>n</code> &lt; {@link #length()}_at_start). 
  */
 public final class Hits {
   private Weight weight;
@@ -45,12 +52,20 @@
   private HitDoc last;          // tail of LRU cache
   private int numDocs = 0;      // number cached
   private int maxDocs = 200;    // max to cache
+  
+  private int nDeletions;       // # deleted docs in the index.    
+  private int lengthAtStart;    // this is the number apps usually count on (although deletions
can bring it down). 
+  private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
+
+  boolean debugCheckedForDeletions = false; // for test purposes.
 
   Hits(Searcher s, Query q, Filter f) throws IOException {
     weight = q.weight(s);
     searcher = s;
     filter = f;
+    nDeletions = countDeletions(s);
     getMoreDocs(50); // retrieve 100 initially
+    lengthAtStart = length;
   }
 
   Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
@@ -58,7 +73,18 @@
     searcher = s;
     filter = f;
     sort = o;
+    nDeletions = countDeletions(s);
     getMoreDocs(50); // retrieve 100 initially
+    lengthAtStart = length;
+  }
+
+  // count # deletions, return -1 if unknown.
+  private int countDeletions(Searcher s) throws IOException {
+    int cnt = -1;
+    if (s instanceof IndexSearcher) {
+      cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs(); 
+    } 
+    return cnt;
   }
 
   /**
@@ -72,6 +98,7 @@
 
     int n = min * 2;	// double # retrieved
     TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight,
filter, n, sort);
+    
     length = topDocs.totalHits;
     ScoreDoc[] scoreDocs = topDocs.scoreDocs;
 
@@ -81,11 +108,36 @@
       scoreNorm = 1.0f / topDocs.getMaxScore();
     }
 
+    int start = hitDocs.size() - nDeletedHits;
+
+    // any new deletions?
+    int nDels2 = countDeletions(searcher);
+    debugCheckedForDeletions = false;
+    if (nDeletions < 0 || nDels2 > nDeletions) { 
+      // either we cannot count deletions, or some "previously valid hits" might have been
deleted, so find exact start point
+      nDeletedHits = 0;
+      debugCheckedForDeletions = true;
+      int i2 = 0;
+      for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
+        int id1 = ((HitDoc)hitDocs.get(i1)).id;
+        int id2 = scoreDocs[i2].doc;
+        if (id1 == id2) {
+          i2++;
+        } else {
+          nDeletedHits ++;
+        }
+      }
+      start = i2;
+    }
+
     int end = scoreDocs.length < length ? scoreDocs.length : length;
-    for (int i = hitDocs.size(); i < end; i++) {
+    length += nDeletedHits;
+    for (int i = start; i < end; i++) {
       hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
                                     scoreDocs[i].doc));
     }
+    
+    nDeletions = nDels2;
   }
 
   /** Returns the total number of hits available in this set. */
@@ -146,7 +198,7 @@
   }
 
   private final HitDoc hitDoc(int n) throws IOException {
-    if (n >= length) {
+    if (n >= lengthAtStart) {
       throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
     }
 
@@ -154,6 +206,10 @@
       getMoreDocs(n);
     }
 
+    if (n >= length) {
+      throw new ConcurrentModificationException("Not a valid hit number: " + n);
+    }
+    
     return (HitDoc) hitDocs.elementAt(n);
   }
 

Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java?rev=606614&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java Sun
Dec 23 12:50:29 2007
@@ -0,0 +1,180 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ConcurrentModificationException;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Test Hits searches with interleaved deletions.
+ * 
+ * See {@link http://issues.apache.org/jira/browse/LUCENE-1096}.
+ */
+public class TestSearchHitsWithDeletions extends TestCase {
+
+  private static boolean VERBOSE = false;  
+  private static final String TEXT_FIELD = "text";
+  private static final int N = 16100;
+
+  private static Directory directory;
+
+  public void setUp() throws Exception {
+    // Create an index writer.
+    directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
+    for (int i=0; i<N; i++) {
+      writer.addDocument(createDocument(i));
+    }
+    writer.optimize();
+    writer.close();
+  }
+
+  /**
+   * Deletions during search should not alter previously retrieved hits.
+   */
+  public void testSearchHitsDeleteAll() throws Exception {
+    doTestSearchHitsDeleteEvery(1,false);
+  }
+  
+  /**
+   * Deletions during search should not alter previously retrieved hits.
+   */
+  public void testSearchHitsDeleteEvery2ndHit() throws Exception {
+    doTestSearchHitsDeleteEvery(2,false);
+  }
+
+  /**
+   * Deletions during search should not alter previously retrieved hits.
+   */
+  public void testSearchHitsDeleteEvery4thHit() throws Exception {
+    doTestSearchHitsDeleteEvery(4,false);
+  }
+
+  /**
+   * Deletions during search should not alter previously retrieved hits.
+   */
+  public void testSearchHitsDeleteEvery8thHit() throws Exception {
+    doTestSearchHitsDeleteEvery(8,false);
+  }
+
+  /**
+   * Deletions during search should not alter previously retrieved hits.
+   */
+  public void testSearchHitsDeleteEvery90thHit() throws Exception {
+    doTestSearchHitsDeleteEvery(90,false);
+  }
+
+  /**
+   * Deletions during search should not alter previously retrieved hits,
+   * and deletions that affect total number of hits should throw the 
+   * correct exception when trying to fetch "too many".
+   */
+  public void testSearchHitsDeleteEvery8thHitAndInAdvance() throws Exception {
+    doTestSearchHitsDeleteEvery(8,true);
+  }
+
+  /**
+   * Verify that ok also with no deletions at all.
+   */
+  public void testSearchHitsNoDeletes() throws Exception {
+    doTestSearchHitsDeleteEvery(N+100,false);
+  }
+
+  /**
+   * Deletions that affect total number of hits should throw the 
+   * correct exception when trying to fetch "too many".
+   */
+  public void testSearchHitsDeleteInAdvance() throws Exception {
+    doTestSearchHitsDeleteEvery(N+100,true);
+  }
+
+  /**
+   * Intermittent deletions during search, should not alter previously retrieved hits.
+   * (Using a debugger to verify that the check in Hits is performed only  
+   */
+  public void testSearchHitsDeleteIntermittent() throws Exception {
+    doTestSearchHitsDeleteEvery(-1,false);
+  }
+
+  
+  private void doTestSearchHitsDeleteEvery(int k, boolean deleteInFront) throws Exception
{
+    boolean intermittent = k<0;
+    log("Test search hits with "+(intermittent ? "intermittent deletions." : "deletions of
every "+k+" hit."));
+    IndexSearcher searcher = new IndexSearcher(directory);
+    IndexReader reader = searcher.getIndexReader();
+    Query q = new TermQuery(new Term(TEXT_FIELD,"text")); // matching all docs
+    Hits hits = searcher.search(q);
+    log("Got "+hits.length()+" results");
+    assertEquals("must match all "+N+" docs, not only "+hits.length()+" docs!",N,hits.length());
+    if (deleteInFront) {
+      log("deleting hits that was not yet retrieved!");
+      reader.deleteDocument(reader.maxDoc()-1);
+      reader.deleteDocument(reader.maxDoc()-2);
+      reader.deleteDocument(reader.maxDoc()-3);
+    }
+    try {
+      for (int i = 0; i < hits.length(); i++) {
+        int id = hits.id(i);
+        assertEquals("Hit "+i+" has doc id "+hits.id(i)+" instead of "+i,i,hits.id(i));
+        if ((intermittent && (i==50 || i==250 || i==950)) || //100-yes, 200-no, 400-yes,
800-no, 1600-yes 
+            (!intermittent && (k<2 || (i>0 && i%k==0)))) {
+          Document doc = hits.doc(id);
+          log("Deleting hit "+i+" - doc "+doc+" with id "+id);
+          reader.deleteDocument(id);
+        }
+        if (intermittent) {
+          // check internal behavior of Hits (go 50 ahead of getMoreDocs points because the
deletions cause to use more of the available hits)
+          if (i==150 || i==450 || i==1650) {
+            assertTrue("Hit "+i+": hits should have checked for deletions in last call to
getMoreDocs()",hits.debugCheckedForDeletions);
+          } else if (i==50 || i==250 || i==850) {
+            assertFalse("Hit "+i+": hits should have NOT checked for deletions in last call
to getMoreDocs()",hits.debugCheckedForDeletions);
+          }
+        }
+      }
+    } catch (ConcurrentModificationException e) {
+      // this is the only valid exception, and only when deletng in front.
+      assertTrue(e.getMessage()+" not expected unless deleting hits that were not yet seen!",deleteInFront);
+    }
+    searcher.close();
+  }
+
+  private static Document createDocument(int id) {
+    Document doc = new Document();
+    doc.add(new Field(TEXT_FIELD, "text of document"+id, Field.Store.YES, Field.Index.TOKENIZED));
+    return doc;
+  }
+
+  private static void log (String s) {
+    if (VERBOSE) {
+      System.out.println(s);
+    }
+  }
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestSearchHitsWithDeletions.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message