lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r506964 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Date Tue, 13 Feb 2007 10:43:09 GMT
Author: mikemccand
Date: Tue Feb 13 02:43:08 2007
New Revision: 506964

URL: http://svn.apache.org/viewvc?view=rev&rev=506964
Log:
LUCENE-565: merge NewIndexModifier back into IndexWriter

Added:
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
Removed:
    lucene/java/trunk/src/java/org/apache/lucene/index/NewIndexModifier.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestNewIndexModifierDelete.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=506964&r1=506963&r2=506964
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Feb 13 02:43:08 2007
@@ -102,10 +102,10 @@
     their passing unit tests.
     (Otis Gospodnetic)
 
-13. LUCENE-565: Added NewIndexModifier (subclass of IndexWriter) to
-    more efficiently handle updating documents (the "delete then add"
-    use case).  This is intended to be an eventual replacement for the
-    existing IndexModifier.  Added IndexWriter.flush() (renamed from
+13. LUCENE-565: Added methods to IndexWriter to more efficiently
+    handle updating documents (the "delete then add" use case).  This
+    is intended to be an eventual replacement for the existing
+    IndexModifier.  Added IndexWriter.flush() (renamed from
     flushRamSegments()) to flush all pending updates (held in RAM), to
     the Directory.  (Ning Li via Mike McCandless)
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java?view=diff&rev=506964&r1=506963&r2=506964
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java Tue Feb 13 02:43:08
2007
@@ -27,6 +27,11 @@
 import java.io.PrintStream;
 
 /**
+ * <p>[Note that as of <b>2.1</b>, all but one of the
+ * methods in this class are available via {@link
+ * IndexWriter}.  The one method that is not available is
+ * {@link #deleteDocument(int)}.]</p>
+ *
  * A class to modify an index, i.e. to delete and add documents. This
  * class hides {@link IndexReader} and {@link IndexWriter} so that you
  * do not need to care about implementation details such as that adding

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&rev=506964&r1=506963&r2=506964
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Tue Feb 13 02:43:08
2007
@@ -32,6 +32,9 @@
 import java.io.PrintStream;
 import java.util.Vector;
 import java.util.HashSet;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map.Entry;
 
 /**
   An IndexWriter creates and maintains an index.
@@ -58,7 +61,16 @@
   is also thrown if an IndexReader on the same directory is used to delete documents
   from the index.</p>
   
-  @see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion
+  <p>As of <b>2.1</b>, IndexWriter can now delete documents
+  by {@link Term} (see {@link #deleteDocuments} ) and update
+  (delete then add) documents (see {@link #updateDocument}).
+  Deletes are buffered until {@link
+  #setMaxBufferedDeleteTerms} <code>Terms</code> at which
+  point they are flushed to the index.  Note that a flush
+  occurs when there are enough buffered deletes or enough
+  added documents, whichever is sooner.  When a flush
+  occurs, both pending deletes and added documents are
+  flushed to the index.</p>
   */
 
 public class IndexWriter {
@@ -84,6 +96,11 @@
   public final static int DEFAULT_MAX_BUFFERED_DOCS = 10;
 
   /**
+   * Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}.
+   */
+  public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000;
+
+  /**
    * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}.
    */
   public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
@@ -108,8 +125,8 @@
   private HashSet protectedSegments; // segment names that should not be deleted until commit
   private SegmentInfos rollbackSegmentInfos;      // segmentInfos we will fallback to if
the commit fails
 
-  protected SegmentInfos segmentInfos = new SegmentInfos();       // the segments
-  protected SegmentInfos ramSegmentInfos = new SegmentInfos();    // the segments in ramDirectory
+  SegmentInfos segmentInfos = new SegmentInfos();       // the segments
+  SegmentInfos ramSegmentInfos = new SegmentInfos();    // the segments in ramDirectory
   private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
   private IndexFileDeleter deleter;
 
@@ -117,6 +134,16 @@
 
   private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
 
+  // The max number of delete terms that can be buffered before
+  // they must be flushed to disk.
+  private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
+
+  // This Hashmap buffers delete terms in ram before they are applied.
+  // The key is delete term; the value is number of ram
+  // segments the term applies to.
+  private HashMap bufferedDeleteTerms = new HashMap();
+  private int numBufferedDeleteTerms = 0;
+
   /** Use compound file setting. Defaults to true, minimizing the number of
    * files used.  Setting this to false may improve indexing performance, but
    * may also cause file handle problems.
@@ -125,10 +152,6 @@
 
   private boolean closeDir;
 
-  protected IndexFileDeleter getDeleter() {
-    return deleter;
-  }
-
   /** Get the current setting of whether to use the compound file format.
    *  Note that this just returns the value you set with setUseCompoundFile(boolean)
    *  or the default. You cannot use this to query the status of an existing index.
@@ -440,6 +463,28 @@
     return minMergeDocs;
   }
 
+  /**
+   * <p>Determines the minimal number of delete terms required before the buffered
+   * in-memory delete terms are applied and flushed. If there are documents
+   * buffered in memory at the time, they are merged and a new segment is
+   * created.</p>
+
+   * <p>The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.
+   * @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than 1</p>
+   */
+  public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
+    if (maxBufferedDeleteTerms < 1)
+      throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");
+    this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
+  }
+
+  /**
+   * @see #setMaxBufferedDeleteTerms
+   */
+  public int getMaxBufferedDeleteTerms() {
+    return maxBufferedDeleteTerms;
+  }
+
   /** Determines how often segment indices are merged by addDocument().  With
    * smaller values, less RAM is used while indexing, and searches on
    * unoptimized indices are faster, but indexing speed is slower.  With larger
@@ -653,21 +698,73 @@
     }
   }
 
-  final SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)
+  SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)
       throws IOException {
     DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
     dw.setInfoStream(infoStream);
-    String segmentName = newRAMSegmentName();
+    String segmentName = newRamSegmentName();
     dw.addDocument(segmentName, doc);
     return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
   }
 
-  // for test purpose
-  final synchronized int getRAMSegmentCount() {
-    return ramSegmentInfos.size();
+  /**
+   * Deletes the document(s) containing <code>term</code>.
+   * @param term the term to identify the documents to be deleted
+   */
+  public synchronized void deleteDocuments(Term term) throws IOException {
+    bufferDeleteTerm(term);
+    maybeFlushRamSegments();
+  }
+
+  /**
+   * Deletes the document(s) containing any of the
+   * terms. All deletes are flushed at the same time.
+   * @param terms array of terms to identify the documents
+   * to be deleted
+   */
+  public synchronized void deleteDocuments(Term[] terms) throws IOException {
+    for (int i = 0; i < terms.length; i++) {
+      bufferDeleteTerm(terms[i]);
+    }
+    maybeFlushRamSegments();
+  }
+
+  /**
+   * Updates a document by first deleting the document(s)
+   * containing <code>term</code> and then adding the new
+   * document.  The delete and then add are atomic as seen
+   * by a reader on the same index (flush may happen only after
+   * the add).
+   * @param term the term to identify the document(s) to be
+   * deleted
+   * @param doc the document to be added
+   */
+  public void updateDocument(Term term, Document doc) throws IOException {
+    updateDocument(term, doc, getAnalyzer());
   }
 
-  final synchronized String newRAMSegmentName() {
+  /**
+   * Updates a document by first deleting the document(s)
+   * containing <code>term</code> and then adding the new
+   * document.  The delete and then add are atomic as seen
+   * by a reader on the same index (flush may happen only after
+   * the add).
+   * @param term the term to identify the document(s) to be
+   * deleted
+   * @param doc the document to be added
+   * @param analyzer the analyzer to use when analyzing the document
+   */
+  public void updateDocument(Term term, Document doc, Analyzer analyzer)
+      throws IOException {
+    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
+    synchronized (this) {
+      bufferDeleteTerm(term);
+      ramSegmentInfos.addElement(newSegmentInfo);
+      maybeFlushRamSegments();
+    }
+  }
+
+  final synchronized String newRamSegmentName() {
     return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX);
   }
 
@@ -677,6 +774,11 @@
   }
 
   // for test purpose
+  final synchronized int getRamSegmentCount(){
+    return ramSegmentInfos.size();
+  }
+
+  // for test purpose
   final synchronized int getDocCount(int i) {
     if (i >= 0 && i < segmentInfos.size()) {
       return segmentInfos.info(i).docCount;
@@ -1228,40 +1330,32 @@
   //         counts x and y, then f(x) >= f(y).
   //      2: The number of committed segments on the same level (f(n)) <= M.
 
-  protected boolean timeToFlushRam() {
-    return ramSegmentInfos.size() >= minMergeDocs;
-  }
-
-  protected boolean anythingToFlushRam() {
-    return ramSegmentInfos.size() > 0;
-  }
-
-  // true if only buffered inserts, no buffered deletes
-  protected boolean onlyRamDocsToFlush() {
-    return true;
-  }
-
-  // whether the latest segment is the flushed merge of ram segments
-  protected void doAfterFlushRamSegments(boolean flushedRamSegments)
-      throws IOException {
+  // This is called after pending added and deleted
+  // documents have been flushed to the Directory but before
+  // the change is committed (new segments_N file written).
+  void doAfterFlush()
+    throws IOException {
   }
 
   protected final void maybeFlushRamSegments() throws IOException {
-    if (timeToFlushRam()) {
+    // A flush is triggered if enough new documents are buffered or
+    // if enough delete terms are buffered
+    if (ramSegmentInfos.size() >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms)
{
       flushRamSegments();
     }
   }
 
   /** Expert:  Flushes all RAM-resident segments (buffered documents), then may merge segments.
*/
   private final synchronized void flushRamSegments() throws IOException {
-    if (anythingToFlushRam()) {
+    if (ramSegmentInfos.size() > 0 || bufferedDeleteTerms.size() > 0) {
       mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
       maybeMergeSegments(minMergeDocs);
     }
   }
 
   /**
-   * Flush all in-memory buffered updates to the Directory.
+   * Flush all in-memory buffered updates (adds and deletes)
+   * to the Directory.
    * @throws IOException
    */
   public final synchronized void flush() throws IOException {
@@ -1350,7 +1444,9 @@
   private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end)
     throws IOException {
 
-    boolean mergeFlag = end > 0;
+    // We may be called solely because there are deletes
+    // pending, in which case doMerge is false:
+    boolean doMerge = end > 0;
     final String mergedName = newSegmentName();
     SegmentMerger merger = null;
 
@@ -1366,21 +1462,21 @@
     // This is try/finally to make sure merger's readers are closed:
     try {
 
-     if (mergeFlag) {
-      if (infoStream != null) infoStream.print("merging segments");
-      merger = new SegmentMerger(this, mergedName);
-
-      for (int i = minSegment; i < end; i++) {
-        SegmentInfo si = sourceSegments.info(i);
-        if (infoStream != null)
-          infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
-        IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
-        merger.add(reader);
-        if ((reader.directory() == this.directory) || // if we own the directory
-            (reader.directory() == this.ramDirectory))
-          segmentsToDelete.addElement(reader);   // queue segment for deletion
+      if (doMerge) {
+        if (infoStream != null) infoStream.print("merging segments");
+        merger = new SegmentMerger(this, mergedName);
+
+        for (int i = minSegment; i < end; i++) {
+          SegmentInfo si = sourceSegments.info(i);
+          if (infoStream != null)
+            infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+          IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
+          merger.add(reader);
+          if ((reader.directory() == this.directory) || // if we own the directory
+              (reader.directory() == this.ramDirectory))
+            segmentsToDelete.addElement(reader);   // queue segment for deletion
+        }
       }
-     }
 
       SegmentInfos rollback = null;
       boolean success = false;
@@ -1389,40 +1485,41 @@
       // if we hit exception when doing the merge:
       try {
 
-       if (mergeFlag) {
-        mergedDocCount = merger.merge();
+        if (doMerge) {
+          mergedDocCount = merger.merge();
 
-        if (infoStream != null) {
-          infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
-        }
+          if (infoStream != null) {
+            infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
+          }
 
-        newSegment = new SegmentInfo(mergedName, mergedDocCount,
-                                     directory, false, true);
-       }
+          newSegment = new SegmentInfo(mergedName, mergedDocCount,
+                                       directory, false, true);
+        }
 
         if (!inTransaction
-            && (sourceSegments != ramSegmentInfos || !onlyRamDocsToFlush())) {
+            && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() >
0)) {
           // Now save the SegmentInfo instances that
           // we are replacing:
           rollback = (SegmentInfos) segmentInfos.clone();
         }
 
-       if (mergeFlag) {
-        if (sourceSegments == ramSegmentInfos) {
-          segmentInfos.addElement(newSegment);
-        } else {
-          for (int i = end-1; i > minSegment; i--)     // remove old infos & add new
-            sourceSegments.remove(i);
+        if (doMerge) {
+          if (sourceSegments == ramSegmentInfos) {
+            segmentInfos.addElement(newSegment);
+          } else {
+            for (int i = end-1; i > minSegment; i--)     // remove old infos & add
new
+              sourceSegments.remove(i);
 
-          segmentInfos.set(minSegment, newSegment);
+            segmentInfos.set(minSegment, newSegment);
+          }
         }
-       }
 
         if (sourceSegments == ramSegmentInfos) {
           // Should not be necessary: no prior commit should
           // have left pending files, so just defensive:
           deleter.clearPendingFiles();
-          doAfterFlushRamSegments(mergeFlag);
+          maybeApplyDeletes(doMerge);
+          doAfterFlush();
         }
 
         if (!inTransaction) {
@@ -1446,7 +1543,7 @@
 
           // Must rollback so our state matches index:
 
-          if (sourceSegments == ramSegmentInfos && onlyRamDocsToFlush()) {
+          if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size())
{
             // Simple case: newSegment may or may not have
             // been added to the end of our segment infos,
             // so just check & remove if so:
@@ -1476,21 +1573,21 @@
       }
     } finally {
       // close readers before we attempt to delete now-obsolete segments
-      if (mergeFlag) merger.closeReaders();
+      if (doMerge) merger.closeReaders();
     }
 
     if (!inTransaction) {
       // Attempt to delete all files we just obsoleted:
       deleter.deleteFile(segmentsInfosFileName);    // delete old segments_N file
       deleter.deleteSegments(segmentsToDelete);     // delete now-unused segments
-      // including the old del files
+      // Includes the old del files
       deleter.commitPendingFiles();
     } else {
       deleter.addPendingFile(segmentsInfosFileName);    // delete old segments_N file
       deleter.deleteSegments(segmentsToDelete, protectedSegments);     // delete now-unused
segments
     }
 
-    if (useCompoundFile && mergeFlag) {
+    if (useCompoundFile && doMerge) {
 
       segmentsInfosFileName = nextSegmentsFileName;
       nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
@@ -1531,6 +1628,58 @@
     return mergedDocCount;
   }
 
+  // Called during flush to apply any buffered deletes.  If
+  // doMerge is true then a new segment was just created and
+  // flushed from the ram segments.
+  private final void maybeApplyDeletes(boolean doMerge) throws IOException {
+
+    if (bufferedDeleteTerms.size() > 0) {
+      if (infoStream != null)
+        infoStream.println("flush " + numBufferedDeleteTerms + " buffered deleted terms on
"
+                           + segmentInfos.size() + " segments.");
+
+      if (doMerge) {
+        IndexReader reader = null;
+        try {
+          reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
+          reader.setDeleter(deleter);
+
+          // Apply delete terms to the segment just flushed from ram
+          // apply appropriately so that a delete term is only applied to
+          // the documents buffered before it, not those buffered after it.
+          applyDeletesSelectively(bufferedDeleteTerms, reader);
+        } finally {
+          if (reader != null)
+            reader.close();
+        }
+      }
+
+      int infosEnd = segmentInfos.size();
+      if (doMerge) {
+        infosEnd--;
+      }
+
+      for (int i = 0; i < infosEnd; i++) {
+        IndexReader reader = null;
+        try {
+          reader = SegmentReader.get(segmentInfos.info(i));
+          reader.setDeleter(deleter);
+
+          // Apply delete terms to disk segments
+          // except the one just flushed from ram.
+          applyDeletes(bufferedDeleteTerms, reader);
+        } finally {
+          if (reader != null)
+            reader.close();
+        }
+      }
+
+      // Clean up bufferedDeleteTerms.
+      bufferedDeleteTerms.clear();
+      numBufferedDeleteTerms = 0;
+    }
+  }
+
   private final boolean checkNonDecreasingLevels(int start) {
     int lowerBound = -1;
     int upperBound = minMergeDocs;
@@ -1547,5 +1696,84 @@
       }
     }
     return true;
+  }
+
+  // For test purposes.
+  final synchronized int getBufferedDeleteTermsSize() {
+    return bufferedDeleteTerms.size();
+  }
+
+  // For test purposes.
+  final synchronized int getNumBufferedDeleteTerms() {
+    return numBufferedDeleteTerms;
+  }
+
+  // Number of ram segments a delete term applies to.
+  private class Num {
+    private int num;
+
+    Num(int num) {
+      this.num = num;
+    }
+
+    int getNum() {
+      return num;
+    }
+
+    void setNum(int num) {
+      this.num = num;
+    }
+  }
+
+  // Buffer a term in bufferedDeleteTerms, which records the
+  // current number of documents buffered in ram so that the
+  // delete term will be applied to those ram segments as
+  // well as the disk segments.
+  private void bufferDeleteTerm(Term term) {
+    Num num = (Num) bufferedDeleteTerms.get(term);
+    if (num == null) {
+      bufferedDeleteTerms.put(term, new Num(ramSegmentInfos.size()));
+    } else {
+      num.setNum(ramSegmentInfos.size());
+    }
+    numBufferedDeleteTerms++;
+  }
+
+  // Apply buffered delete terms to the segment just flushed from ram
+  // apply appropriately so that a delete term is only applied to
+  // the documents buffered before it, not those buffered after it.
+  private final void applyDeletesSelectively(HashMap deleteTerms,
+      IndexReader reader) throws IOException {
+    Iterator iter = deleteTerms.entrySet().iterator();
+    while (iter.hasNext()) {
+      Entry entry = (Entry) iter.next();
+      Term term = (Term) entry.getKey();
+
+      TermDocs docs = reader.termDocs(term);
+      if (docs != null) {
+        int num = ((Num) entry.getValue()).getNum();
+        try {
+          while (docs.next()) {
+            int doc = docs.doc();
+            if (doc >= num) {
+              break;
+            }
+            reader.deleteDocument(doc);
+          }
+        } finally {
+          docs.close();
+        }
+      }
+    }
+  }
+
+  // Apply buffered delete terms to this reader.
+  private final void applyDeletes(HashMap deleteTerms, IndexReader reader)
+      throws IOException {
+    Iterator iter = deleteTerms.entrySet().iterator();
+    while (iter.hasNext()) {
+      Entry entry = (Entry) iter.next();
+      reader.deleteDocuments((Term) entry.getKey());
+    }
   }
 }

Added: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java?view=auto&rev=506964
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Tue Feb
13 02:43:08 2007
@@ -0,0 +1,444 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MockRAMDirectory;
+import org.apache.lucene.store.RAMDirectory;
+
+public class TestIndexWriterDelete extends TestCase {
+
+  // test the simple case
+  public void testSimpleCase() throws IOException {
+    String[] keywords = { "1", "2" };
+    String[] unindexed = { "Netherlands", "Italy" };
+    String[] unstored = { "Amsterdam has lots of bridges",
+        "Venice has lots of canals" };
+    String[] text = { "Amsterdam", "Venice" };
+
+    Directory dir = new RAMDirectory();
+    IndexWriter modifier = new IndexWriter(dir,
+        new WhitespaceAnalyzer(), true);
+    modifier.setUseCompoundFile(true);
+    modifier.setMaxBufferedDeleteTerms(1);
+
+    for (int i = 0; i < keywords.length; i++) {
+      Document doc = new Document();
+      doc.add(new Field("id", keywords[i], Field.Store.YES,
+          Field.Index.UN_TOKENIZED));
+      doc.add(new Field("country", unindexed[i], Field.Store.YES,
+          Field.Index.NO));
+      doc.add(new Field("contents", unstored[i], Field.Store.NO,
+          Field.Index.TOKENIZED));
+      doc
+          .add(new Field("city", text[i], Field.Store.YES,
+              Field.Index.TOKENIZED));
+      modifier.addDocument(doc);
+    }
+    modifier.optimize();
+
+    Term term = new Term("city", "Amsterdam");
+    int hitCount = getHitCount(dir, term);
+    assertEquals(1, hitCount);
+    modifier.deleteDocuments(term);
+    hitCount = getHitCount(dir, term);
+    assertEquals(0, hitCount);
+
+    modifier.close();
+  }
+
+  // test when delete terms only apply to disk segments
+  public void testNonRAMDelete() throws IOException {
+    Directory dir = new RAMDirectory();
+    IndexWriter modifier = new IndexWriter(dir,
+        new WhitespaceAnalyzer(), true);
+    modifier.setMaxBufferedDocs(2);
+    modifier.setMaxBufferedDeleteTerms(2);
+
+    int id = 0;
+    int value = 100;
+
+    for (int i = 0; i < 7; i++) {
+      addDoc(modifier, ++id, value);
+    }
+    modifier.flush();
+
+    assertEquals(0, modifier.getRamSegmentCount());
+    assertTrue(0 < modifier.getSegmentCount());
+
+    IndexReader reader = IndexReader.open(dir);
+    assertEquals(7, reader.numDocs());
+    reader.close();
+
+    modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+    modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+
+    reader = IndexReader.open(dir);
+    assertEquals(0, reader.numDocs());
+    reader.close();
+
+    modifier.close();
+  }
+
+  // test when delete terms only apply to ram segments
+  public void testRAMDeletes() throws IOException {
+    Directory dir = new RAMDirectory();
+    IndexWriter modifier = new IndexWriter(dir,
+        new WhitespaceAnalyzer(), true);
+    modifier.setMaxBufferedDocs(4);
+    modifier.setMaxBufferedDeleteTerms(4);
+
+    int id = 0;
+    int value = 100;
+
+    addDoc(modifier, ++id, value);
+    modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+    addDoc(modifier, ++id, value);
+    modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+
+    assertEquals(2, modifier.getNumBufferedDeleteTerms());
+    assertEquals(1, modifier.getBufferedDeleteTermsSize());
+
+    addDoc(modifier, ++id, value);
+    assertEquals(0, modifier.getSegmentCount());
+    modifier.flush();
+
+    IndexReader reader = IndexReader.open(dir);
+    assertEquals(1, reader.numDocs());
+
+    int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
+    assertEquals(1, hitCount);
+    reader.close();
+
+    modifier.close();
+  }
+
+  // test when delete terms apply to both disk and ram segments
+  public void testBothDeletes() throws IOException {
+    Directory dir = new RAMDirectory();
+    IndexWriter modifier = new IndexWriter(dir,
+        new WhitespaceAnalyzer(), true);
+    modifier.setMaxBufferedDocs(100);
+    modifier.setMaxBufferedDeleteTerms(100);
+
+    int id = 0;
+    int value = 100;
+
+    for (int i = 0; i < 5; i++) {
+      addDoc(modifier, ++id, value);
+    }
+
+    value = 200;
+    for (int i = 0; i < 5; i++) {
+      addDoc(modifier, ++id, value);
+    }
+    modifier.flush();
+
+    for (int i = 0; i < 5; i++) {
+      addDoc(modifier, ++id, value);
+    }
+    modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+    modifier.flush();
+
+    IndexReader reader = IndexReader.open(dir);
+    assertEquals(5, reader.numDocs());
+
+    modifier.close();
+  }
+
+  // test that batched delete terms are flushed together
+  public void testBatchDeletes() throws IOException {
+    Directory dir = new RAMDirectory();
+    IndexWriter modifier = new IndexWriter(dir,
+        new WhitespaceAnalyzer(), true);
+    modifier.setMaxBufferedDocs(2);
+    modifier.setMaxBufferedDeleteTerms(2);
+
+    int id = 0;
+    int value = 100;
+
+    for (int i = 0; i < 7; i++) {
+      addDoc(modifier, ++id, value);
+    }
+    modifier.flush();
+
+    IndexReader reader = IndexReader.open(dir);
+    assertEquals(7, reader.numDocs());
+    reader.close();
+
+    id = 0;
+    modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
+    modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
+
+    reader = IndexReader.open(dir);
+    assertEquals(5, reader.numDocs());
+    reader.close();
+
+    Term[] terms = new Term[3];
+    for (int i = 0; i < terms.length; i++) {
+      terms[i] = new Term("id", String.valueOf(++id));
+    }
+    modifier.deleteDocuments(terms);
+
+    reader = IndexReader.open(dir);
+    assertEquals(2, reader.numDocs());
+    reader.close();
+
+    modifier.close();
+  }
+
+  private void addDoc(IndexWriter modifier, int id, int value)
+      throws IOException {
+    Document doc = new Document();
+    doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
+    doc.add(new Field("id", String.valueOf(id), Field.Store.YES,
+        Field.Index.UN_TOKENIZED));
+    doc.add(new Field("value", String.valueOf(value), Field.Store.NO,
+        Field.Index.UN_TOKENIZED));
+    modifier.addDocument(doc);
+  }
+
+  private int getHitCount(Directory dir, Term term) throws IOException {
+    IndexSearcher searcher = new IndexSearcher(dir);
+    int hitCount = searcher.search(new TermQuery(term)).length();
+    searcher.close();
+    return hitCount;
+  }
+
+  public void testDeletesOnDiskFull() throws IOException {
+    testOperationsOnDiskFull(false);
+  }
+
+  public void testUpdatesOnDiskFull() throws IOException {
+    testOperationsOnDiskFull(true);
+  }
+
+  /**
+   * Make sure if modifier tries to commit but hits disk full that modifier
+   * remains consistent and usable. Similar to TestIndexReader.testDiskFull().
+   */
+  private void testOperationsOnDiskFull(boolean updates) throws IOException {
+
+    boolean debug = false;
+    Term searchTerm = new Term("content", "aaa");
+    int START_COUNT = 157;
+    int END_COUNT = 144;
+
+    // First build up a starting index:
+    RAMDirectory startDir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(),
+        true);
+    for (int i = 0; i < 157; i++) {
+      Document d = new Document();
+      d.add(new Field("id", Integer.toString(i), Field.Store.YES,
+          Field.Index.UN_TOKENIZED));
+      d.add(new Field("content", "aaa " + i, Field.Store.NO,
+          Field.Index.TOKENIZED));
+      writer.addDocument(d);
+    }
+    writer.close();
+
+    long diskUsage = startDir.sizeInBytes();
+    long diskFree = diskUsage + 10;
+
+    IOException err = null;
+
+    boolean done = false;
+
+    // Iterate w/ ever increasing free disk space:
+    while (!done) {
+      MockRAMDirectory dir = new MockRAMDirectory(startDir);
+      IndexWriter modifier = new IndexWriter(dir,
+          new WhitespaceAnalyzer(), false);
+
+      modifier.setMaxBufferedDocs(1000); // use flush or close
+      modifier.setMaxBufferedDeleteTerms(1000); // use flush or close
+
+      // For each disk size, first try to commit against
+      // dir that will hit random IOExceptions & disk
+      // full; after, give it infinite disk space & turn
+      // off random IOExceptions & retry w/ same reader:
+      boolean success = false;
+
+      for (int x = 0; x < 2; x++) {
+
+        double rate = 0.1;
+        double diskRatio = ((double)diskFree) / diskUsage;
+        long thisDiskFree;
+        String testName;
+
+        if (0 == x) {
+          thisDiskFree = diskFree;
+          if (diskRatio >= 2.0) {
+            rate /= 2;
+          }
+          if (diskRatio >= 4.0) {
+            rate /= 2;
+          }
+          if (diskRatio >= 6.0) {
+            rate = 0.0;
+          }
+          if (debug) {
+            System.out.println("\ncycle: " + diskFree + " bytes");
+          }
+          testName = "disk full during reader.close() @ " + thisDiskFree
+              + " bytes";
+        } else {
+          thisDiskFree = 0;
+          rate = 0.0;
+          if (debug) {
+            System.out.println("\ncycle: same writer: unlimited disk space");
+          }
+          testName = "reader re-use after disk full";
+        }
+
+        dir.setMaxSizeInBytes(thisDiskFree);
+        dir.setRandomIOExceptionRate(rate, diskFree);
+
+        try {
+          if (0 == x) {
+            int docId = 12;
+            for (int i = 0; i < 13; i++) {
+              if (updates) {
+                Document d = new Document();
+                d.add(new Field("id", Integer.toString(i), Field.Store.YES,
+                    Field.Index.UN_TOKENIZED));
+                d.add(new Field("content", "bbb " + i, Field.Store.NO,
+                    Field.Index.TOKENIZED));
+                modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
+              } else { // deletes
+                modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
+                // modifier.setNorm(docId, "contents", (float)2.0);
+              }
+              docId += 12;
+            }
+          }
+          modifier.close();
+          success = true;
+          if (0 == x) {
+            done = true;
+          }
+        }
+        catch (IOException e) {
+          if (debug) {
+            System.out.println("  hit IOException: " + e);
+          }
+          err = e;
+          if (1 == x) {
+            e.printStackTrace();
+            fail(testName + " hit IOException after disk space was freed up");
+          }
+        }
+
+        // Whether we succeeded or failed, check that all
+        // un-referenced files were in fact deleted (ie,
+        // we did not create garbage). Just create a
+        // new IndexFileDeleter, have it delete
+        // unreferenced files, then verify that in fact
+        // no files were deleted:
+        String[] startFiles = dir.list();
+        SegmentInfos infos = new SegmentInfos();
+        infos.read(dir);
+        IndexFileDeleter d = new IndexFileDeleter(infos, dir);
+        d.findDeletableFiles();
+        d.deleteFiles();
+        String[] endFiles = dir.list();
+
+        Arrays.sort(startFiles);
+        Arrays.sort(endFiles);
+
+        // for(int i=0;i<startFiles.length;i++) {
+        // System.out.println(" startFiles: " + i + ": " + startFiles[i]);
+        // }
+
+        if (!Arrays.equals(startFiles, endFiles)) {
+          String successStr;
+          if (success) {
+            successStr = "success";
+          } else {
+            successStr = "IOException";
+            err.printStackTrace();
+          }
+          fail("reader.close() failed to delete unreferenced files after "
+              + successStr + " (" + diskFree + " bytes): before delete:\n    "
+              + arrayToString(startFiles) + "\n  after delete:\n    "
+              + arrayToString(endFiles));
+        }
+
+        // Finally, verify index is not corrupt, and, if
+        // we succeeded, we see all docs changed, and if
+        // we failed, we see either all docs or no docs
+        // changed (transactional semantics):
+        IndexReader newReader = null;
+        try {
+          newReader = IndexReader.open(dir);
+        }
+        catch (IOException e) {
+          e.printStackTrace();
+          fail(testName
+              + ":exception when creating IndexReader after disk full during close: "
+              + e);
+        }
+
+        IndexSearcher searcher = new IndexSearcher(newReader);
+        Hits hits = null;
+        try {
+          hits = searcher.search(new TermQuery(searchTerm));
+        }
+        catch (IOException e) {
+          e.printStackTrace();
+          fail(testName + ": exception when searching: " + e);
+        }
+        int result2 = hits.length();
+        if (success) {
+          if (result2 != END_COUNT) {
+            fail(testName
+                + ": method did not throw exception but hits.length for search on term 'aaa'
is "
+                + result2 + " instead of expected " + END_COUNT);
+          }
+        } else {
+          // On hitting exception we still may have added
+          // all docs:
+          if (result2 != START_COUNT && result2 != END_COUNT) {
+            err.printStackTrace();
+            fail(testName
+                + ": method did throw exception but hits.length for search on term 'aaa'
is "
+                + result2 + " instead of expected " + START_COUNT);
+          }
+        }
+
+        searcher.close();
+        newReader.close();
+
+        if (result2 == END_COUNT) {
+          break;
+        }
+      }
+
+      dir.close();
+
+      // Try again with 10 more bytes of free space:
+      diskFree += 10;
+    }
+  }
+
+  private String arrayToString(String[] l) {
+    String s = "";
+    for (int i = 0; i < l.length; i++) {
+      if (i > 0) {
+        s += "\n    ";
+      }
+      s += l[i];
+    }
+    return s;
+  }
+}

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java?view=diff&rev=506964&r1=506963&r2=506964
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Tue
Feb 13 02:43:08 2007
@@ -40,7 +40,7 @@
     for (int i = 0; i < 100; i++) {
       addDoc(writer);
       checkInvariants(writer);
-      if (writer.getRAMSegmentCount() + writer.getSegmentCount() >= 18) {
+      if (writer.getRamSegmentCount() + writer.getSegmentCount() >= 18) {
         noOverMerge = true;
       }
     }
@@ -178,7 +178,7 @@
     int mergeFactor = writer.getMergeFactor();
     int maxMergeDocs = writer.getMaxMergeDocs();
 
-    int ramSegmentCount = writer.getRAMSegmentCount();
+    int ramSegmentCount = writer.getRamSegmentCount();
     assertTrue(ramSegmentCount < maxBufferedDocs);
 
     int lowerBound = -1;



Mime
View raw message