lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r432125 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/IndexWriter.java
Date Thu, 17 Aug 2006 02:52:22 GMT
Author: yonik
Date: Wed Aug 16 19:52:21 2006
New Revision: 432125

URL: http://svn.apache.org/viewvc?rev=432125&view=rev
Log:
keep track of number of buffered documents: LUCENE-388

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=432125&r1=432124&r2=432125&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Aug 16 19:52:21 2006
@@ -121,6 +121,10 @@
      during segment merges (e.g. during indexing or optimizing), thus improving
      performance . (Michael Busch via Otis Gospodnetic)
 
+  4. LUCENE-388: Improve indexing performance when maxBufferedDocs is large by
+     keeping a count of buffered documents rather than counting after each
+     document addition.  (Doron Cohen, Paul Smith, Yonik Seeley)
+
 Release 2.0.0 2006-05-26
 
 API Changes

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=432125&r1=432124&r2=432125&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Wed Aug 16 19:52:21
2006
@@ -110,6 +110,7 @@
   private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
   private final Directory ramDirectory = new RAMDirectory(); // for temp segs
 
+  private int singleDocSegmentsCount = 0; // for speeding decision on merge candidates
   private Lock writeLock;
 
   private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
@@ -119,7 +120,7 @@
    * may also cause file handle problems.
    */
   private boolean useCompoundFile = true;
-  
+
   private boolean closeDir;
 
   /** Get the current setting of whether to use the compound file format.
@@ -245,7 +246,7 @@
        throws IOException {
     this(d, a, create, false);
   }
-  
+
   private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)
     throws IOException {
       this.closeDir = closeDir;
@@ -303,7 +304,7 @@
   public void setMaxFieldLength(int maxFieldLength) {
     this.maxFieldLength = maxFieldLength;
   }
-  
+
   /**
    * @see #setMaxFieldLength
    */
@@ -318,7 +319,7 @@
    * the number of files open in a FSDirectory.
    *
    * <p> The default value is 10.
-   * 
+   *
    * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
    */
   public void setMaxBufferedDocs(int maxBufferedDocs) {
@@ -433,6 +434,7 @@
   /** Flushes all changes to an index and closes all associated files. */
   public synchronized void close() throws IOException {
     flushRamSegments();
+    // testInvariants();
     ramDirectory.close();
     if (writeLock != null) {
       writeLock.release();                          // release write lock
@@ -509,14 +511,16 @@
     dw.addDocument(segmentName, doc);
     synchronized (this) {
       segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory));
+      singleDocSegmentsCount++;
       maybeMergeSegments();
     }
+    // testInvariants();
   }
 
   final int getSegmentsCounter(){
     return segmentInfos.counter;
   }
-  
+
   private final synchronized String newSegmentName() {
     return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
   }
@@ -575,6 +579,7 @@
       int minSegment = segmentInfos.size() - mergeFactor;
       mergeSegments(minSegment < 0 ? 0 : minSegment);
     }
+    // testInvariants();
   }
 
   /** Merges all segments from an array of indexes into this index.
@@ -599,7 +604,7 @@
         segmentInfos.addElement(sis.info(j));	  // add each info
       }
     }
-    
+
     // merge newly added segments in log(n) passes
     while (segmentInfos.size() > start+mergeFactor) {
       for (int base = start; base < segmentInfos.size(); base++) {
@@ -610,6 +615,7 @@
     }
 
     optimize();					  // final cleanup
+    // testInvariants();
   }
 
   /** Merges the provided indexes into this index.
@@ -631,7 +637,7 @@
         merger.add(sReader);
         segmentsToDelete.addElement(sReader);   // queue segment for deletion
     }
-      
+
     for (int i = 0; i < readers.length; i++)      // add new indexes
       merger.add(readers[i]);
 
@@ -639,7 +645,7 @@
 
     segmentInfos.setSize(0);                      // pop old infos & add new
     segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory));
-    
+
     if(sReader != null)
         sReader.close();
 
@@ -651,7 +657,7 @@
 	  }
 	}.run();
     }
-    
+
     deleteSegments(segmentsToDelete);  // delete now-unused segments
 
     if (useCompoundFile) {
@@ -666,9 +672,11 @@
         }.run();
       }
 
-      // delete now unused files of segment 
-      deleteFiles(filesToDelete);   
+      // delete now unused files of segment
+      deleteFiles(filesToDelete);
     }
+
+    // testInvariants();
   }
 
   /** Merges all RAM-resident segments. */
@@ -694,8 +702,8 @@
     long targetMergeDocs = minMergeDocs;
     while (targetMergeDocs <= maxMergeDocs) {
       // find segments smaller than current target size
-      int minSegment = segmentInfos.size();
-      int mergeDocs = 0;
+      int minSegment = segmentInfos.size() - singleDocSegmentsCount; // top 1-doc segments
are taken for sure
+      int mergeDocs = singleDocSegmentsCount;
       while (--minSegment >= 0) {
         SegmentInfo si = segmentInfos.info(minSegment);
         if (si.docCount >= targetMergeDocs)
@@ -703,10 +711,12 @@
         mergeDocs += si.docCount;
       }
 
-      if (mergeDocs >= targetMergeDocs)		  // found a merge to do
+      if (mergeDocs >= targetMergeDocs)	{	  // found a merge to do
         mergeSegments(minSegment+1);
-      else
+        singleDocSegmentsCount = 0;
+      } else {
         break;
+      }
 
       targetMergeDocs *= mergeFactor;		  // increase target size
     }
@@ -780,6 +790,50 @@
       deleteFiles(filesToDelete);   
     }
   }
+
+  /***
+  private synchronized void testInvariants() {
+    // index segments should decrease in size
+    int maxSegLevel = 0;
+    for (int i=segmentInfos.size()-1; i>=0; i--) {
+      SegmentInfo si = segmentInfos.info(i);
+      int segLevel = (si.docCount)/minMergeDocs;
+      if (segLevel < maxSegLevel) {
+
+        throw new RuntimeException("Segment #" + i + " is too small. " + segInfo());
+      }
+      maxSegLevel = Math.max(maxSegLevel,segLevel);
+    }
+
+    // check if merges needed
+    long targetMergeDocs = minMergeDocs;
+    int minSegment = segmentInfos.size();
+
+    while (targetMergeDocs <= maxMergeDocs && minSegment>=0) {
+      int mergeDocs = 0;
+      while (--minSegment >= 0) {
+        SegmentInfo si = segmentInfos.info(minSegment);
+        if (si.docCount >= targetMergeDocs) break;
+        mergeDocs += si.docCount;
+      }
+
+      if (mergeDocs >= targetMergeDocs) {
+        throw new RuntimeException("Merge needed at level "+targetMergeDocs + " :"+segInfo());
+      }
+
+      targetMergeDocs *= mergeFactor;		  // increase target size
+    }
+  }
+
+  private String segInfo() {
+    StringBuffer sb = new StringBuffer("minMergeDocs="+minMergeDocs+" singleDocSegmentsCount="+singleDocSegmentsCount+"
segsizes:");
+    for (int i=0; i<segmentInfos.size(); i++) {
+      sb.append(segmentInfos.info(i).docCount);
+      sb.append(",");
+    }
+    return sb.toString();
+  }
+  ***/
 
   /*
    * Some operating systems (e.g. Windows) don't permit a file to be deleted



Mime
View raw message