lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1619620 - in /lucene/dev/trunk/lucene: ./ codecs/src/java/org/apache/lucene/codecs/simpletext/ core/src/java/org/apache/lucene/codecs/lucene46/ core/src/java/org/apache/lucene/index/ core/src/java/org/apache/lucene/util/ core/src/test/org/...
Date Thu, 21 Aug 2014 22:58:30 GMT
Author: mikemccand
Date: Thu Aug 21 22:58:30 2014
New Revision: 1619620

URL: http://svn.apache.org/r1619620
Log:
LUCENE-5985: add id for each segment and commit to aid replication

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 21 22:58:30 2014
@@ -98,6 +98,11 @@ Other
 ======================= Lucene 4.11.0 ======================
 (No Changes)
 
+New Features
+
+* LUCENE-5895: Lucene now stores a unique id per-segment and per-commit to aid
+  in accurate replication of index files (Robert Muir, Mike McCandless)
+
 ======================= Lucene 4.10.0 ======================
 
 New Features

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
(original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
Thu Aug 21 22:58:30 2014
@@ -17,15 +17,6 @@ package org.apache.lucene.codecs.simplet
  * limitations under the License.
  */
 
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
-
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
@@ -44,6 +35,16 @@ import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;
 
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_ID;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
+
 /**
  * reads plaintext segments files
  * <p>
@@ -99,10 +100,14 @@ public class SimpleTextSegmentInfoReader
         files.add(fileName);
       }
       
+      SimpleTextUtil.readLine(input, scratch);
+      assert StringHelper.startsWith(scratch.get(), SI_ID);
+      final String id = readString(SI_ID.length, scratch);
+
       SimpleTextUtil.checkFooter(input);
 
-      SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, 
-                                         isCompoundFile, null, diagnostics);
+      SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
+                                         isCompoundFile, null, diagnostics, id);
       info.setFiles(files);
       success = true;
       return info;

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
(original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
Thu Aug 21 22:58:30 2014
@@ -48,6 +48,7 @@ public class SimpleTextSegmentInfoWriter
   final static BytesRef SI_DIAG_VALUE       = new BytesRef("      value ");
   final static BytesRef SI_NUM_FILES        = new BytesRef("    files ");
   final static BytesRef SI_FILE             = new BytesRef("      file ");
+  final static BytesRef SI_ID               = new BytesRef("    id ");
   
   @Override
   public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws
IOException {
@@ -104,6 +105,10 @@ public class SimpleTextSegmentInfoWriter
           SimpleTextUtil.writeNewline(output);
         }
       }
+
+      SimpleTextUtil.write(output, SI_ID);
+      SimpleTextUtil.write(output, si.getId(), scratch);
+      SimpleTextUtil.writeNewline(output);
       
       SimpleTextUtil.writeChecksum(output, scratch);
       success = true;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
Thu Aug 21 22:58:30 2014
@@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutpu
  * <p>
  * Files:
  * <ul>
- *   <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics,
Files, Footer
+ *   <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics,
Files, Id, Footer
  * </ul>
  * </p>
  * Data types:
@@ -44,6 +44,7 @@ import org.apache.lucene.store.DataOutpu
  *   <li>Diagnostics --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
  *   <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
  *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
+ *   <li>Id --&gt; {@link DataOutput#writeString String}</li>
  * </ul>
  * </p>
  * Field Descriptions:
@@ -88,5 +89,6 @@ public class Lucene46SegmentInfoFormat e
   static final String CODEC_NAME = "Lucene46SegmentInfo";
   static final int VERSION_START = 0;
   static final int VERSION_CHECKSUM = 1;
-  static final int VERSION_CURRENT = VERSION_CHECKSUM;
+  static final int VERSION_ID = 2;
+  static final int VERSION_CURRENT = VERSION_ID;
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
Thu Aug 21 22:58:30 2014
@@ -62,13 +62,20 @@ public class Lucene46SegmentInfoReader e
       final Map<String,String> diagnostics = input.readStringStringMap();
       final Set<String> files = input.readStringSet();
       
+      String id;
+      if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
+        id = input.readString();
+      } else {
+        id = null;
+      }
+
       if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
         CodecUtil.checkFooter(input);
       } else {
         CodecUtil.checkEOF(input);
       }
 
-      final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile,
null, diagnostics);
+      final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile,
null, diagnostics, id);
       si.setFiles(files);
 
       success = true;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
Thu Aug 21 22:58:30 2014
@@ -59,6 +59,7 @@ public class Lucene46SegmentInfoWriter e
       output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
       output.writeStringStringMap(si.getDiagnostics());
       output.writeStringSet(si.files());
+      output.writeString(si.getId());
       CodecUtil.writeFooter(output);
       success = true;
     } finally {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Thu Aug
21 22:58:30 2014
@@ -484,7 +484,7 @@ public class CheckIndex {
     }
 
     msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
-        + " " + versionString + " format=" + sFormat + userDataString);
+        + " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
 
     if (onlySegments != null) {
       result.partial = true;
@@ -535,6 +535,7 @@ public class CheckIndex {
 
       try {
         msg(infoStream, "    version=" + (version == null ? "3.0" : version));
+        msg(infoStream, "    id=" + info.info.getId());
         final Codec codec = info.info.getCodec();
         msg(infoStream, "    codec=" + codec);
         segInfoStat.codec = codec;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
Thu Aug 21 22:58:30 2014
@@ -39,6 +39,7 @@ import org.apache.lucene.util.InfoStream
 import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.MutableBits;
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;
 
 import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
@@ -178,7 +179,7 @@ class DocumentsWriterPerThread {
     pendingUpdates.clear();
     deleteSlice = deleteQueue.newSlice();
    
-    segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false,
codec, null);
+    segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false,
codec, null, StringHelper.randomId());
     assert numDocsInRAM == 0;
     if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
       infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName
+ " delQueue=" + deleteQueue);  

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Thu Aug
21 22:58:30 2014
@@ -63,6 +63,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util.Version;
 
@@ -2566,7 +2567,7 @@ public class IndexWriter implements Clos
       TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
 
       SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1,
-                                         false, codec, null);
+                                         false, codec, null, StringHelper.randomId());
 
       SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
                                                MergeState.CheckAbort.NONE, globalFieldNumberMap,

@@ -2667,7 +2668,7 @@ public class IndexWriter implements Clos
     // Same SI as before but we change directory and name
     SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
                                           info.info.getUseCompoundFile(), info.info.getCodec(),

-                                          info.info.getDiagnostics());
+                                          info.info.getDiagnostics(), StringHelper.randomId());
     SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo,
         info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(),
         info.getDocValuesGen());
@@ -3789,7 +3790,7 @@ public class IndexWriter implements Clos
     // ConcurrentMergePolicy we keep deterministic segment
     // names.
     final String mergeSegmentName = newSegmentName();
-    SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false,
codec, null);
+    SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false,
codec, null, StringHelper.randomId());
     Map<String,String> details = new HashMap<>();
     details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
     details.put("mergeFactor", Integer.toString(merge.segments.size()));

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java Thu Aug
21 22:58:30 2014
@@ -27,7 +27,6 @@ import java.util.regex.Matcher;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.TrackingDirectoryWrapper;
-import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;
 
@@ -58,10 +57,13 @@ public final class SegmentInfo {
 
   private boolean isCompoundFile;
 
+  /** Id that uniquely identifies this segment. */
+  private final String id;
+
   private Codec codec;
 
   private Map<String,String> diagnostics;
-  
+
   // Tracks the Lucene version this segment was created with, since 3.1. Null
   // indicates an older than 3.0 index, and it's used to detect a too old index.
   // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
@@ -80,12 +82,22 @@ public final class SegmentInfo {
   }
 
   /**
+   * Construct a new complete SegmentInfo instance from
+   * input, with a newly generated random id.
+   */
+  public SegmentInfo(Directory dir, Version version, String name, int docCount,
+                     boolean isCompoundFile, Codec codec, Map<String,String> diagnostics)
{
+    this(dir, version, name, docCount, isCompoundFile, codec, diagnostics, null);
+  }
+
+  /**
    * Construct a new complete SegmentInfo instance from input.
    * <p>Note: this is public only to allow access from
    * the codecs package.</p>
    */
   public SegmentInfo(Directory dir, Version version, String name, int docCount,
-                     boolean isCompoundFile, Codec codec, Map<String,String> diagnostics)
{
+                     boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
+                     String id) {
     assert !(dir instanceof TrackingDirectoryWrapper);
     this.dir = dir;
     this.version = version;
@@ -94,6 +106,7 @@ public final class SegmentInfo {
     this.isCompoundFile = isCompoundFile;
     this.codec = codec;
     this.diagnostics = diagnostics;
+    this.id = id;
   }
 
   /**
@@ -212,6 +225,11 @@ public final class SegmentInfo {
     return version;
   }
 
+  /** Return the id that uniquely identifies this segment. */
+  public String getId() {
+    return id;
+  }
+
   private Set<String> setFiles;
 
   /** Sets the files written for this segment. */

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java Thu Aug
21 22:58:30 2014
@@ -27,8 +27,8 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.codecs.Codec;
@@ -43,6 +43,7 @@ import org.apache.lucene.store.IOContext
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.NoSuchDirectoryException;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * A collection of segmentInfo objects with methods for operating on those
@@ -137,6 +138,9 @@ public final class SegmentInfos implemen
   /** The file format version for the segments_N codec header, since 4.9+ */
   public static final int VERSION_49 = 3;
 
+  /** The file format version for the segments_N codec header, since 4.10+ */
+  public static final int VERSION_410 = 4;
+
   // Used for the segments.gen file only!
   // Whenever you add a new format, make it 1 smaller (negative version logic)!
   private static final int FORMAT_SEGMENTS_GEN_47 = -2;
@@ -167,6 +171,9 @@ public final class SegmentInfos implemen
    */
   private static PrintStream infoStream = null;
 
+  /** Id for this commit; only written starting with Lucene 4.10 */
+  private String id;
+
   /** Sole constructor. Typically you call this and then
    *  use {@link #read(Directory) or
    *  #read(Directory,String)} to populate each {@link
@@ -317,6 +324,12 @@ public final class SegmentInfos implemen
                                                  nextGeneration);
   }
 
+  /** Since Lucene 4.10, every commit (segments_N) writes a unique id.  This will
+   *  return that id, or null if this commit was pre-4.10. */
+  public String getId() {
+    return id;
+  }
+
   /**
    * Read a particular segmentFileName.  Note that this may
    * throw an IOException if a commit is in process.
@@ -345,7 +358,7 @@ public final class SegmentInfos implemen
         throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
       }
       // 4.0+
-      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_49);
+      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_410);
       version = input.readLong();
       counter = input.readInt();
       int numSegments = input.readInt();
@@ -410,6 +423,9 @@ public final class SegmentInfos implemen
         add(siPerCommit);
       }
       userData = input.readStringStringMap();
+      if (format >= VERSION_410) {
+        id = input.readString();
+      }
 
       if (format >= VERSION_48) {
         CodecUtil.checkFooter(input);
@@ -470,7 +486,7 @@ public final class SegmentInfos implemen
 
     try {
       segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
-      CodecUtil.writeHeader(segnOutput, "segments", VERSION_49);
+      CodecUtil.writeHeader(segnOutput, "segments", VERSION_410);
       segnOutput.writeLong(version); 
       segnOutput.writeInt(counter); // write counter
       segnOutput.writeInt(size()); // write infos
@@ -496,6 +512,7 @@ public final class SegmentInfos implemen
         assert si.dir == directory;
       }
       segnOutput.writeStringStringMap(userData);
+      segnOutput.writeString(StringHelper.randomId());
       pendingSegnOutput = segnOutput;
       success = true;
     } finally {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/StringHelper.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/StringHelper.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/StringHelper.java Thu Aug
21 22:58:30 2014
@@ -17,8 +17,9 @@ package org.apache.lucene.util;
  * limitations under the License.
  */
 
-import java.util.Comparator;
-import java.util.StringTokenizer;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Properties;
 
 /**
  * Methods for manipulating strings.
@@ -228,4 +229,88 @@ public abstract class StringHelper {
   public static int murmurhash3_x86_32(BytesRef bytes, int seed) {
     return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed);
   }
+
+  // Holds 128 bit unsigned value:
+  private static BigInteger nextId;
+  private static final BigInteger idMask;
+  private static final Object idLock = new Object();
+  private static final String idPad = "00000000000000000000000000000000";
+
+  static {
+    byte[] maskBytes = new byte[16];
+    Arrays.fill(maskBytes, (byte) 0xff);
+    idMask = new BigInteger(maskBytes);
+    String prop = System.getProperty("tests.seed");
+
+    // State for xorshift128:
+    long x0;
+    long x1;
+
+    long seed;
+    if (prop != null) {
+      // So if there is a test failure that somehow relied on this id,
+      // we remain reproducible based on the test seed:
+      if (prop.length() > 8) {
+        prop = prop.substring(prop.length()-8);
+      }
+      x0 = Long.parseLong(prop, 16);
+      x1 = x0;
+    } else {
+      // "Ghetto randomess" from 3 different sources:
+      x0 = System.nanoTime();
+      x1 = StringHelper.class.hashCode() << 32;
+      StringBuilder sb = new StringBuilder();
+      // Properties can vary across JVM instances:
+      Properties p = System.getProperties();
+      for (String s: p.stringPropertyNames()) {
+        sb.append(s);
+        sb.append(p.getProperty(s));
+      }
+      x1 |= sb.toString().hashCode();
+      // TODO: maybe read from /dev/urandom when it's available?
+    }
+
+    // Use a few iterations of xorshift128 to scatter the seed
+    // in case multiple Lucene instances starting up "near" the same
+    // nanoTime, since we use ++ (mod 2^128) for full period cycle:
+    for(int i=0;i<10;i++) {
+      long s1 = x0;
+      long s0 = x1;
+      x0 = s0;
+      s1 ^= s1 << 23; // a
+      x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
+    }
+
+    // Concatentate bits of x0 and x1, as unsigned 128 bit integer:
+    nextId = new BigInteger(1, BigInteger.valueOf(x0).shiftLeft(64).or(BigInteger.valueOf(x1)).toByteArray());
+  }
+
+  /** Generates a non-cryptographic globally unique id. */
+  public static String randomId() {
+
+    // NOTE: we don't use Java's UUID.randomUUID() implementation here because:
+    //
+    //   * It's overkill for our usage: it tries to be cryptographically
+    //     secure, whereas for this use we don't care if someone can
+    //     guess the IDs.
+    //
+    //   * It uses SecureRandom, which on Linux can easily take a long time
+    //     (I saw ~ 10 seconds just running a Lucene test) when entropy
+    //     harvesting is falling behind.
+    //
+    //   * It loses a few (6) bits to version and variant and it's not clear
+    //     what impact that has on the period, whereas the simple ++ (mod 2^128)
+    //     we use here is guaranteed to have the full period.
+
+    String id;
+    synchronized(idLock) {
+      id = nextId.toString(16);
+      nextId = nextId.add(BigInteger.ONE).and(idMask);
+    }
+
+    assert id.length() <= 32: "id=" + id;
+    id = idPad.substring(id.length()) + id;
+
+    return id;
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Thu
Aug 21 22:58:30 2014
@@ -81,11 +81,12 @@ import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.SetOnce;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.junit.Test;
 
 public class TestIndexWriter extends LuceneTestCase {
@@ -2819,4 +2820,55 @@ public class TestIndexWriter extends Luc
     iw.close();
     dir.close();
   }
+
+  // LUCENE-5895:
+
+  /** Make sure we see ids per segment and per commit. */
+  public void testIds() throws Exception {
+    Directory d = newDirectory();
+    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
+    w.addDocument(new Document());
+    w.close();
+    
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(d);
+    String id1 = sis.getId();
+    assertNotNull(id1);
+    
+    String id2 = sis.info(0).info.getId();
+    if (defaultCodecSupportsSegmentIds()) {
+      assertNotNull(id2);
+    } else {
+      assertNull(id2);
+    }
+
+    // Make sure CheckIndex includes id output:
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    CheckIndex checker = new CheckIndex(d);
+    checker.setCrossCheckTermVectors(false);
+    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false);
+    CheckIndex.Status indexStatus = checker.checkIndex(null);
+    String s = bos.toString(IOUtils.UTF_8);
+    // Make sure CheckIndex didn't fail
+    assertTrue(s, indexStatus != null && indexStatus.clean);
+
+    // Commit id is always stored:
+    assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
+
+    // Per-segment id may or may not be stored depending on the codec:
+    if (defaultCodecSupportsSegmentIds()) {
+      assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
+    } else {
+      assertTrue("missing id=null in:\n" + s, s.contains("id=null"));
+    }
+    d.close();
+
+    Set<String> ids = new HashSet<>();
+    for(int i=0;i<100000;i++) {
+      String id = StringHelper.randomId();
+      assertFalse("id=" + id + " i=" + i, ids.contains(id));
+      ids.add(id);
+    }
+  }
 }
+

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1619620&r1=1619619&r2=1619620&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
(original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
Thu Aug 21 22:58:30 2014
@@ -55,6 +55,9 @@ import java.util.logging.Logger;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat;
+import org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoFormat;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -1736,6 +1739,12 @@ public abstract class LuceneTestCase ext
     return true;
   }
 
+  /** Returns true if the codec "supports" writing segment and commit ids. */
+  public static boolean defaultCodecSupportsSegmentIds() {
+    SegmentInfoFormat siFormat = Codec.getDefault().segmentInfoFormat();
+    return siFormat instanceof SimpleTextSegmentInfoFormat || siFormat instanceof Lucene46SegmentInfoFormat;
+  }
+
   public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader)
throws IOException {
     assertReaderStatisticsEquals(info, leftReader, rightReader);
     assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader),
true);



Mime
View raw message