lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1062889 - in /lucene/dev/trunk/lucene/src/java/org/apache/lucene/index: CompoundFileWriter.java LogMergePolicy.java MergePolicy.java SegmentMerger.java codecs/MergeState.java codecs/TermsConsumer.java
Date Mon, 24 Jan 2011 17:26:18 GMT
Author: mikemccand
Date: Mon Jan 24 17:26:17 2011
New Revision: 1062889

URL: http://svn.apache.org/viewvc?rev=1062889&view=rev
Log:
LUCENE-2382: check for aborted merge when merging postings

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java?rev=1062889&r1=1062888&r2=1062889&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Mon Jan
24 17:26:17 2011
@@ -17,16 +17,16 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.LinkedList;
+
+import org.apache.lucene.index.codecs.MergeState;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.IOUtils;
 
-import java.util.LinkedList;
-import java.util.HashSet;
-
-import java.io.IOException;
-
 /**
  * Combines multiple files into a single compound file.
  * The file format:<br>
@@ -76,7 +76,7 @@ final class CompoundFileWriter {
     private HashSet<String> ids;
     private LinkedList<FileEntry> entries;
     private boolean merged = false;
-    private SegmentMerger.CheckAbort checkAbort;
+    private MergeState.CheckAbort checkAbort;
 
     /** Create the compound stream in the specified file. The file name is the
      *  entire name (no extensions are added).
@@ -86,7 +86,7 @@ final class CompoundFileWriter {
       this(dir, name, null);
     }
 
-    CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
+    CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) {
         if (dir == null)
             throw new NullPointerException("directory cannot be null");
         if (name == null)

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1062889&r1=1062888&r2=1062889&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Jan 24
17:26:17 2011
@@ -481,6 +481,7 @@ public abstract class LogMergePolicy ext
       if (size < 1)
         size = 1;
       levels[i] = (float) Math.log(size)/norm;
+      message("seg " + info.name + " level=" + levels[i]);
     }
 
     final float levelFloor;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1062889&r1=1062888&r2=1062889&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Jan 24 17:26:17
2011
@@ -110,7 +110,7 @@ public abstract class MergePolicy implem
       return aborted;
     }
 
-    synchronized void checkAborted(Directory dir) throws MergeAbortedException {
+    public synchronized void checkAborted(Directory dir) throws MergeAbortedException {
       if (aborted) {
         throw new MergeAbortedException("merge is aborted: " + segString(dir));
       }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1062889&r1=1062888&r2=1062889&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Mon Jan 24
17:26:17 2011
@@ -59,7 +59,7 @@ final class SegmentMerger {
   
   private int mergedDocs;
 
-  private final CheckAbort checkAbort;
+  private final MergeState.CheckAbort checkAbort;
 
   /** Maximum number of contiguous documents to bulk-copy
       when merging stored fields */
@@ -78,9 +78,9 @@ final class SegmentMerger {
     this.fieldInfos = fieldInfos;
     segment = name;
     if (merge != null) {
-      checkAbort = new CheckAbort(merge, directory);
+      checkAbort = new MergeState.CheckAbort(merge, directory);
     } else {
-      checkAbort = new CheckAbort(null, null) {
+      checkAbort = new MergeState.CheckAbort(null, null) {
         @Override
         public void work(double units) throws MergeAbortedException {
           // do nothing
@@ -508,6 +508,7 @@ final class SegmentMerger {
     mergeState.hasPayloadProcessorProvider = payloadProcessorProvider != null;
     mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[mergeState.readerCount];
     mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[mergeState.readerCount];
+    mergeState.checkAbort = checkAbort;
 
     docBase = 0;
     int inputDocBase = 0;
@@ -612,31 +613,4 @@ final class SegmentMerger {
       }
     }
   }
-
-  static class CheckAbort {
-    private double workCount;
-    private MergePolicy.OneMerge merge;
-    private Directory dir;
-    public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
-      this.merge = merge;
-      this.dir = dir;
-    }
-
-    /**
-     * Records the fact that roughly units amount of work
-     * have been done since this method was last called.
-     * When adding time-consuming code into SegmentMerger,
-     * you should test different values for units to ensure
-     * that the time in between calls to merge.checkAborted
-     * is up to ~ 1 second.
-     */
-    public void work(double units) throws MergePolicy.MergeAbortedException {
-      workCount += units;
-      if (workCount >= 10000.0) {
-        merge.checkAborted(dir);
-        workCount = 0;
-      }
-    }
-  }
-  
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java?rev=1062889&r1=1062888&r2=1062889&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/MergeState.java Mon Jan
24 17:26:17 2011
@@ -17,13 +17,16 @@ package org.apache.lucene.index.codecs;
  * limitations under the License.
  */
 
+import java.util.List;
+
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
 import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
-import java.util.List;
 
 /** Holds common state used during segment merging
  *
@@ -37,6 +40,7 @@ public class MergeState {
   public int[] docBase;                           // New docID base per reader
   public int mergedDocCount;                      // Total # merged docs
   public Bits multiDeletedDocs;
+  public CheckAbort checkAbort;
 
   // Updated per field;
   public FieldInfo fieldInfo;
@@ -45,5 +49,30 @@ public class MergeState {
   public boolean hasPayloadProcessorProvider;
   public DirPayloadProcessor[] dirPayloadProcessor;
   public PayloadProcessor[] currentPayloadProcessor;
-  
+
+  public static class CheckAbort {
+    private double workCount;
+    private MergePolicy.OneMerge merge;
+    private Directory dir;
+    public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
+      this.merge = merge;
+      this.dir = dir;
+    }
+
+    /**
+     * Records the fact that roughly units amount of work
+     * have been done since this method was last called.
+     * When adding time-consuming code into SegmentMerger,
+     * you should test different values for units to ensure
+     * that the time in between calls to merge.checkAborted
+     * is up to ~ 1 second.
+     */
+    public void work(double units) throws MergePolicy.MergeAbortedException {
+      workCount += units;
+      if (workCount >= 10000.0) {
+        merge.checkAborted(dir);
+        workCount = 0;
+      }
+    }
+  }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java?rev=1062889&r1=1062888&r2=1062889&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Mon
Jan 24 17:26:17 2011
@@ -56,6 +56,7 @@ public abstract class TermsConsumer {
     BytesRef term;
     assert termsEnum != null;
     long sumTotalTermFreq = 0;
+    long sumDF = 0;
 
     if (mergeState.fieldInfo.omitTermFreqAndPositions) {
       if (docsEnum == null) {
@@ -73,6 +74,11 @@ public abstract class TermsConsumer {
           final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
           if (stats.docFreq > 0) {
             finishTerm(term, stats);
+            sumDF += stats.docFreq;
+            if (sumDF > 60000) {
+              mergeState.checkAbort.work(sumDF/5.0);
+              sumDF = 0;
+            }
           }
         }
       }
@@ -99,6 +105,11 @@ public abstract class TermsConsumer {
           if (stats.docFreq > 0) {
             finishTerm(term, stats);
             sumTotalTermFreq += stats.totalTermFreq;
+            sumDF += stats.docFreq;
+            if (sumDF > 60000) {
+              mergeState.checkAbort.work(sumDF/5.0);
+              sumDF = 0;
+            }
           }
         }
       }



Mime
View raw message