lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sh...@apache.org
Subject svn commit: r1527361 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/index/ test/org/apache/lucene/index/
Date Sun, 29 Sep 2013 17:37:02 GMT
Author: shaie
Date: Sun Sep 29 17:37:02 2013
New Revision: 1527361

URL: http://svn.apache.org/r1527361
Log:
LUCENE-5246: SegmentInfoPerCommit continues to list unneeded updatesFiles

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java?rev=1527361&r1=1527360&r2=1527361&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
Sun Sep 29 17:37:02 2013
@@ -23,6 +23,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.NoSuchElementException;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.lucene.codecs.Codec;
@@ -351,6 +352,7 @@ class ReadersAndLiveDocs { // TODO (DVU_
     // We can write directly to the actual name (vs to a
     // .tmp & renaming it) because the file is not live
     // until segments file is written:
+    FieldInfos fieldInfos = null;
     boolean success = false;
     try {
       Codec codec = info.info.getCodec();
@@ -385,7 +387,7 @@ class ReadersAndLiveDocs { // TODO (DVU_
             builder.addOrUpdate(f, NumericDocValuesField.TYPE);
           }
           
-          final FieldInfos fieldInfos = builder.finish();
+          fieldInfos = builder.finish();
           final long nextFieldInfosGen = info.getNextFieldInfosGen();
           final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
           final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info,
fieldInfos, null, IOContext.DEFAULT, segmentSuffix);
@@ -502,10 +504,25 @@ class ReadersAndLiveDocs { // TODO (DVU_
         copyUpdatesToMerging();
       }
       numericUpdates.clear();
+      
+      // create a new map, keeping only the gens that are in use
+      Map<Long,Set<String>> genUpdatesFiles = info.getUpdatesFiles();
+      Map<Long,Set<String>> newGenUpdatesFiles = new HashMap<Long,Set<String>>();
+      final long fieldInfosGen = info.getFieldInfosGen();
+      for (FieldInfo fi : fieldInfos) {
+        long dvGen = fi.getDocValuesGen();
+        if (dvGen != -1 && !newGenUpdatesFiles.containsKey(dvGen)) {
+          if (dvGen == fieldInfosGen) {
+            newGenUpdatesFiles.put(fieldInfosGen, trackingDir.getCreatedFiles());
+          } else {
+            newGenUpdatesFiles.put(dvGen, genUpdatesFiles.get(dvGen));
+          }
+        }
+      }
+      
+      info.setGenUpdatesFiles(newGenUpdatesFiles);
     }
 
-    info.addUpdatesFiles(trackingDir.getCreatedFiles());
-    
     return true;
   }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java?rev=1527361&r1=1527360&r2=1527361&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java
Sun Sep 29 17:37:02 2013
@@ -19,7 +19,11 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 
 import org.apache.lucene.store.Directory;
@@ -51,8 +55,8 @@ public class SegmentInfoPerCommit { // T
   // write
   private long nextWriteFieldInfosGen;
 
-  // Tracks the files with field updates
-  private Set<String> updatesFiles = new HashSet<String>();
+  // Track the per-generation updates files
+  private final Map<Long,Set<String>> genUpdatesFiles = new HashMap<Long,Set<String>>();
   
   private volatile long sizeInBytes = -1;
 
@@ -86,14 +90,15 @@ public class SegmentInfoPerCommit { // T
     }
   }
 
-  /** Returns the files which contains field updates. */
-  public Set<String> getUpdatesFiles() {
-    return new HashSet<String>(updatesFiles);
+  /** Returns the per generation updates files. */
+  public Map<Long,Set<String>> getUpdatesFiles() {
+    return Collections.unmodifiableMap(genUpdatesFiles);
   }
   
-  /** Called when we succeed in writing field updates. */
-  public void addUpdatesFiles(Set<String> files) {
-    updatesFiles.addAll(files);
+  /** Sets the updates file names per generation. Does not deep clone the map. */
+  public void setGenUpdatesFiles(Map<Long,Set<String>> genUpdatesFiles) {
+    this.genUpdatesFiles.clear();
+    this.genUpdatesFiles.putAll(genUpdatesFiles);
   }
   
   /** Called when we succeed in writing deletes */
@@ -151,7 +156,9 @@ public class SegmentInfoPerCommit { // T
     info.getCodec().liveDocsFormat().files(this, files);
 
     // Must separately add any field updates files
-    files.addAll(updatesFiles);
+    for (Set<String> updateFiles : genUpdatesFiles.values()) {
+      files.addAll(updateFiles);
+    }
     
     return files;
   }
@@ -248,7 +255,10 @@ public class SegmentInfoPerCommit { // T
     other.nextWriteDelGen = nextWriteDelGen;
     other.nextWriteFieldInfosGen = nextWriteFieldInfosGen;
     
-    other.updatesFiles.addAll(updatesFiles);
+    // deep clone
+    for (Entry<Long,Set<String>> e : genUpdatesFiles.entrySet()) {
+      other.genUpdatesFiles.put(e.getKey(), new HashSet<String>(e.getValue()));
+    }
     
     return other;
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1527361&r1=1527360&r2=1527361&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java Sun Sep
29 17:37:02 2013
@@ -28,6 +28,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 
 import org.apache.lucene.codecs.Codec;
@@ -349,7 +350,17 @@ public final class SegmentInfos implemen
         }
         SegmentInfoPerCommit siPerCommit = new SegmentInfoPerCommit(info, delCount, delGen,
fieldInfosGen);
         if (format >= VERSION_46) {
-          siPerCommit.addUpdatesFiles(input.readStringSet());
+          int numGensUpdatesFiles = input.readInt();
+          final Map<Long,Set<String>> genUpdatesFiles;
+          if (numGensUpdatesFiles == 0) {
+            genUpdatesFiles = Collections.emptyMap();
+          } else {
+            genUpdatesFiles = new HashMap<Long,Set<String>>(numGensUpdatesFiles);
+            for (int i = 0; i < numGensUpdatesFiles; i++) {
+              genUpdatesFiles.put(input.readLong(), input.readStringSet());
+            }
+          }
+          siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
         }
         add(siPerCommit);
       }
@@ -420,7 +431,12 @@ public final class SegmentInfos implemen
         segnOutput.writeLong(siPerCommit.getDelGen());
         segnOutput.writeInt(siPerCommit.getDelCount());
         segnOutput.writeLong(siPerCommit.getFieldInfosGen());
-        segnOutput.writeStringSet(siPerCommit.getUpdatesFiles());
+        final Map<Long,Set<String>> genUpdatesFiles = siPerCommit.getUpdatesFiles();
+        segnOutput.writeInt(genUpdatesFiles.size());
+        for (Entry<Long,Set<String>> e : genUpdatesFiles.entrySet()) {
+          segnOutput.writeLong(e.getKey());
+          segnOutput.writeStringSet(e.getValue());
+        }
         assert si.dir == directory;
 
         assert siPerCommit.getDelCount() <= si.getDocCount();

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java?rev=1527361&r1=1527360&r2=1527361&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java Sun Sep
29 17:37:02 2013
@@ -110,7 +110,7 @@ public final class SegmentReader extends
         final DocValuesFormat dvFormat = codec.docValuesFormat();
         // initialize the per generation numericDVProducers and put the correct
         // DVProducer for each field
-        final Map<Long,List<FieldInfo>> genInfos = getGenInfos(si);
+        final Map<Long,List<FieldInfo>> genInfos = getGenInfos();
         
 //        System.out.println("[" + Thread.currentThread().getName() + "] SR.init: new reader:
" + si + "; gens=" + genInfos.keySet());
 
@@ -178,7 +178,7 @@ public final class SegmentReader extends
         final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
         
         final DocValuesFormat dvFormat = codec.docValuesFormat();
-        final Map<Long,List<FieldInfo>> genInfos = getGenInfos(si);
+        final Map<Long,List<FieldInfo>> genInfos = getGenInfos();
         
         for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
           Long gen = e.getKey();
@@ -244,7 +244,7 @@ public final class SegmentReader extends
   }
   
   // returns a gen->List<FieldInfo> mapping. Fields without DV updates have gen=-1
-  private Map<Long,List<FieldInfo>> getGenInfos(SegmentInfoPerCommit si) {
+  private Map<Long,List<FieldInfo>> getGenInfos() {
     final Map<Long,List<FieldInfo>> genInfos = new HashMap<Long,List<FieldInfo>>();
     for (FieldInfo fi : fieldInfos) {
       if (fi.getDocValuesType() == null) {

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java?rev=1527361&r1=1527360&r2=1527361&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
(original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
Sun Sep 29 17:37:02 2013
@@ -5,6 +5,7 @@ import java.util.HashSet;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
@@ -28,8 +29,8 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util._TestUtil;
 import org.junit.Test;
 
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
@@ -917,10 +918,11 @@ public class TestNumericDocValuesUpdates
     final IndexWriter writer = new IndexWriter(dir, conf);
     
     // create index
-    final int numThreads = atLeast(3);
+    final int numThreads = _TestUtil.nextInt(random(), 3, 6);
     final int numDocs = atLeast(2000);
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
+      doc.add(new StringField("id", "doc" + i, Store.NO));
       double group = random().nextDouble();
       String g;
       if (group < 0.1) g = "g0";
@@ -937,20 +939,21 @@ public class TestNumericDocValuesUpdates
     }
     
     final CountDownLatch done = new CountDownLatch(numThreads);
+    final AtomicInteger numUpdates = new AtomicInteger(atLeast(100));
     
     // same thread updates a field as well as reopens
     Thread[] threads = new Thread[numThreads];
     for (int i = 0; i < threads.length; i++) {
       final String f = "f" + i;
       final String cf = "cf" + i;
-      final int numThreadUpdates = atLeast(40);
       threads[i] = new Thread("UpdateThread-" + i) {
         @Override
         public void run() {
+          DirectoryReader reader = null;
+          boolean success = false;
           try {
             Random random = random();
-            int numUpdates = numThreadUpdates;
-            while (numUpdates-- > 0) {
+            while (numUpdates.getAndDecrement() > 0) {
               double group = random.nextDouble();
               Term t;
               if (group < 0.1) t = new Term("updKey", "g0");
@@ -965,20 +968,43 @@ public class TestNumericDocValuesUpdates
               if (random.nextDouble() < 0.2) {
                 // delete a random document
                 int doc = random.nextInt(numDocs);
+//                System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc"
+ doc);
                 writer.deleteDocuments(new Term("id", "doc" + doc));
               }
-              
-              if (random.nextDouble() < 0.1) {
-                writer.commit(); // rarely commit
+  
+              if (random.nextDouble() < 0.05) { // commit every 20 updates on average
+//                  System.out.println("[" + Thread.currentThread().getName() + "] commit");
+                writer.commit();
               }
               
-              if (random.nextDouble() < 0.3) { // obtain NRT reader (apply updates)
-                DirectoryReader.open(writer, true).close();
+              if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on
average once every 10 updates
+                if (reader == null) {
+//                  System.out.println("[" + Thread.currentThread().getName() + "] open NRT");
+                  reader = DirectoryReader.open(writer, true);
+                } else {
+//                  System.out.println("[" + Thread.currentThread().getName() + "] reopen
NRT");
+                  DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer, true);
+                  if (r2 != null) {
+                    reader.close();
+                    reader = r2;
+                  }
+                }
               }
             }
+//            System.out.println("[" + Thread.currentThread().getName() + "] DONE");
+            success = true;
           } catch (IOException e) {
             throw new RuntimeException(e);
           } finally {
+            if (reader != null) {
+              try {
+                reader.close();
+              } catch (IOException e) {
+                if (success) { // suppress this exception only if there was another exception
+                  throw new RuntimeException(e);
+                }
+              }
+            }
             done.countDown();
           }
         }
@@ -1152,5 +1178,31 @@ public class TestNumericDocValuesUpdates
     
     IOUtils.close(dir1, dir2);
   }
+
+  @Test
+  public void testDeleteUnusedUpdatesFiles() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    Document doc = new Document();
+    doc.add(new StringField("id", "d0", Store.NO));
+    doc.add(new NumericDocValuesField("f", 1L));
+    writer.addDocument(doc);
+
+    // create _0_1.fnm
+    writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L);
+    writer.commit();
+
+    // create _0_2.fnm, and _0_1.fnm should be deleted
+    writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L);
+    writer.commit();
+
+    assertTrue(dir.fileExists("_0_2.fnm"));
+    assertFalse("old generation field infos file should not exist in the directory: _0_1.fnm",
dir.fileExists("_0_1.fnm"));
+    
+    writer.close();
+    dir.close();
+  }
   
 }



Mime
View raw message