lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r775489 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java src/java/org/apache/lucene/index/LogDocMergePolicy.java src/java/org/apache/lucene/index/LogMergePolicy.java
Date Sat, 16 May 2009 15:58:37 GMT
Author: mikemccand
Date: Sat May 16 15:58:36 2009
New Revision: 775489

URL: http://svn.apache.org/viewvc?rev=775489&view=rev
Log:
LUCENE-1634: add calibrateSizeByDeletes to LogMergePolicy

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
    lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java
    lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=775489&r1=775488&r2=775489&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat May 16 15:58:36 2009
@@ -307,6 +307,10 @@
 24. LUCENE-1494: Added FieldMaskingSpanQuery which can be used to
     cross-correlate Spans from different fields.
     (Paul Cowan and Chris Hostetter)
+
+25. LUCENE-1634: Add calibrateSizeByDeletes to LogMergePolicy, to take
+    deletions into account when considering merges.  (Yasuhiro Matsuda
+    via Mike McCandless)
     
 Optimizations
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=775489&r1=775488&r2=775489&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Sat May
16 15:58:36 2009
@@ -36,7 +36,7 @@
     maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
   }
   protected long size(SegmentInfo info) throws IOException {
-    return info.sizeInBytes();
+    return sizeBytes(info);
   }
 
   /** <p>Determines the largest segment (measured by total

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java?rev=775489&r1=775488&r2=775489&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java Sat May 16 15:58:36
2009
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import java.io.IOException;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -34,8 +36,8 @@
     // it to Long.MAX_VALUE to disable it
     maxMergeSize = Long.MAX_VALUE;
   }
-  protected long size(SegmentInfo info) {
-    return info.docCount;
+  protected long size(SegmentInfo info) throws IOException {
+    return sizeDocs(info);
   }
 
   /** Sets the minimum size for the lowest level segments.

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=775489&r1=775488&r2=775489&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java Sat May 16 15:58:36
2009
@@ -20,8 +20,6 @@
 import java.io.IOException;
 import java.util.Set;
 
-import org.apache.lucene.store.Directory;
-
 /** <p>This class implements a {@link MergePolicy} that tries
  *  to merge segments into levels of exponentially
  *  increasing size, where each level has < mergeFactor
@@ -59,6 +57,9 @@
   long maxMergeSize;
   int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
+  /* TODO 3.0: change this default to true */
+  protected boolean calibrateSizeByDeletes = false;
+  
   private boolean useCompoundFile = true;
   private boolean useCompoundDocStore = true;
   private IndexWriter writer;
@@ -132,10 +133,40 @@
     return useCompoundDocStore;
   }
 
+  /** Sets whether the segment size should be calibrated by
+   *  the number of deletes when choosing segments for merge. */
+  public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
+    this.calibrateSizeByDeletes = calibrateSizeByDeletes;
+  }
+
+  /** Returns true if the segment size should be calibrated 
+   *  by the number of deletes when choosing segments for merge. */
+  public boolean getCalibrateSizeByDeletes() {
+    return calibrateSizeByDeletes;
+  }
+
   public void close() {}
 
   abstract protected long size(SegmentInfo info) throws IOException;
 
+  protected long sizeDocs(SegmentInfo info) throws IOException {
+    if (calibrateSizeByDeletes) {
+      return (info.docCount - (long)info.getDelCount());
+    } else {
+      return info.docCount;
+    }
+  }
+  
+  protected long sizeBytes(SegmentInfo info) throws IOException {
+    long byteSize = info.sizeInBytes();
+    if (calibrateSizeByDeletes) {
+      float delRatio = (info.docCount <= 0 ? 0.0f : ((float)info.getDelCount() / (float)info.docCount));
+      return (info.docCount <= 0 ?  byteSize : (long)((float)byteSize * (1.0f - delRatio)));
+    } else {
+      return byteSize;
+    }
+  }
+  
   private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments,
Set segmentsToOptimize) throws IOException {
     final int numSegments = infos.size();
     int numToOptimize = 0;
@@ -321,8 +352,6 @@
     float[] levels = new float[numSegments];
     final float norm = (float) Math.log(mergeFactor);
 
-    final Directory directory = writer.getDirectory();
-
     for(int i=0;i<numSegments;i++) {
       final SegmentInfo info = infos.info(i);
       long size = size(info);
@@ -390,7 +419,7 @@
         boolean anyTooLarge = false;
         for(int i=start;i<end;i++) {
           final SegmentInfo info = infos.info(i);
-          anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
+          anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
         }
 
         if (!anyTooLarge) {



Mime
View raw message