lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r553236 [5/6] - in /lucene/java/trunk: ./ contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/ docs/ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/store/ src/site/src/documentati...
Date Wed, 04 Jul 2007 15:16:40 GMT
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java Wed Jul  4 08:16:38 2007
@@ -43,4 +43,9 @@
     this.omitNorms = omitNorms;
     this.storePayloads = storePayloads;
   }
+
+  public Object clone() {
+    return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
+                         storeOffsetWithTermVector, omitNorms, storePayloads);
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java Wed Jul  4 08:16:38 2007
@@ -62,6 +62,20 @@
     }
   }
 
+  /**
+   * Returns a deep clone of this FieldInfos instance.
+   */
+  public Object clone() {
+    FieldInfos fis = new FieldInfos();
+    final int numField = byNumber.size();
+    for(int i=0;i<numField;i++) {
+      FieldInfo fi = (FieldInfo) ((FieldInfo) byNumber.get(i)).clone();
+      fis.byNumber.add(fi);
+      fis.byName.put(fi.name, fi);
+    }
+    return fis;
+  }
+
   /** Adds field info for a Document. */
   public void add(Document doc) {
     List fields = doc.getFields();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Wed Jul  4 08:16:38 2007
@@ -51,19 +51,39 @@
   private int size;
   private boolean closed;
 
+  // The docID offset where our docs begin in the index
+  // file.  This will be 0 if we have our own private file.
+  private int docStoreOffset;
+
   private ThreadLocal fieldsStreamTL = new ThreadLocal();
 
   FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
-    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE);
+    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
   }
 
   FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize) throws IOException {
+    this(d, segment, fn, readBufferSize, -1, 0);
+  }
+
+  FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
     fieldInfos = fn;
 
     cloneableFieldsStream = d.openInput(segment + ".fdt", readBufferSize);
     fieldsStream = (IndexInput)cloneableFieldsStream.clone();
     indexStream = d.openInput(segment + ".fdx", readBufferSize);
-    size = (int) (indexStream.length() / 8);
+
+    if (docStoreOffset != -1) {
+      // We read only a slice out of this shared fields file
+      this.docStoreOffset = docStoreOffset;
+      this.size = size;
+
+      // Verify the file is long enough to hold all of our
+      // docs
+      assert ((int) (indexStream.length()/8)) >= size + this.docStoreOffset;
+    } else {
+      this.docStoreOffset = 0;
+      this.size = (int) (indexStream.length() / 8);
+    }
   }
 
   /**
@@ -100,7 +120,7 @@
   }
 
   final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
-    indexStream.seek(n * 8L);
+    indexStream.seek((n + docStoreOffset) * 8L);
     long position = indexStream.readLong();
     fieldsStream.seek(position);
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java Wed Jul  4 08:16:38 2007
@@ -24,6 +24,7 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.IndexOutput;
 
 final class FieldsWriter
@@ -38,15 +39,92 @@
 
     private IndexOutput indexStream;
 
+    private boolean doClose;
+
     FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
         fieldInfos = fn;
         fieldsStream = d.createOutput(segment + ".fdt");
         indexStream = d.createOutput(segment + ".fdx");
+        doClose = true;
+    }
+
+    FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
+        fieldInfos = fn;
+        fieldsStream = fdt;
+        indexStream = fdx;
+        doClose = false;
+    }
+
+    // Writes the contents of buffer into the fields stream
+    // and adds a new entry for this document into the index
+    // stream.  This assumes the buffer was already written
+    // in the correct fields format.
+    void flushDocument(RAMOutputStream buffer) throws IOException {
+      indexStream.writeLong(fieldsStream.getFilePointer());
+      buffer.writeTo(fieldsStream);
+    }
+
+    void flush() throws IOException {
+      indexStream.flush();
+      fieldsStream.flush();
     }
 
     final void close() throws IOException {
+      if (doClose) {
         fieldsStream.close();
         indexStream.close();
+      }
+    }
+
+    final void writeField(FieldInfo fi, Fieldable field) throws IOException {
+      // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
+      // and field.binaryValue() already returns the compressed value for a field
+      // with isCompressed()==true, so we disable compression in that case
+      boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
+      fieldsStream.writeVInt(fi.number);
+      byte bits = 0;
+      if (field.isTokenized())
+        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+      if (field.isBinary())
+        bits |= FieldsWriter.FIELD_IS_BINARY;
+      if (field.isCompressed())
+        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
+                
+      fieldsStream.writeByte(bits);
+                
+      if (field.isCompressed()) {
+        // compression is enabled for the current field
+        byte[] data = null;
+                  
+        if (disableCompression) {
+          // optimized case for merging, the data
+          // is already compressed
+          data = field.binaryValue();
+        } else {
+          // check if it is a binary field
+          if (field.isBinary()) {
+            data = compress(field.binaryValue());
+          }
+          else {
+            data = compress(field.stringValue().getBytes("UTF-8"));
+          }
+        }
+        final int len = data.length;
+        fieldsStream.writeVInt(len);
+        fieldsStream.writeBytes(data, len);
+      }
+      else {
+        // compression is disabled for the current field
+        if (field.isBinary()) {
+          byte[] data = field.binaryValue();
+          final int len = data.length;
+          fieldsStream.writeVInt(len);
+          fieldsStream.writeBytes(data, len);
+        }
+        else {
+          fieldsStream.writeString(field.stringValue());
+        }
+      }
     }
 
     final void addDocument(Document doc) throws IOException {
@@ -64,57 +142,8 @@
         fieldIterator = doc.getFields().iterator();
         while (fieldIterator.hasNext()) {
             Fieldable field = (Fieldable) fieldIterator.next();
-            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
-            // and field.binaryValue() already returns the compressed value for a field
-            // with isCompressed()==true, so we disable compression in that case
-            boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
-            if (field.isStored()) {
-                fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
-
-                byte bits = 0;
-                if (field.isTokenized())
-                    bits |= FieldsWriter.FIELD_IS_TOKENIZED;
-                if (field.isBinary())
-                    bits |= FieldsWriter.FIELD_IS_BINARY;
-                if (field.isCompressed())
-                    bits |= FieldsWriter.FIELD_IS_COMPRESSED;
-                
-                fieldsStream.writeByte(bits);
-                
-                if (field.isCompressed()) {
-                  // compression is enabled for the current field
-                  byte[] data = null;
-                  
-                  if (disableCompression) {
-                      // optimized case for merging, the data
-                      // is already compressed
-                      data = field.binaryValue();
-                  } else {
-                      // check if it is a binary field
-                      if (field.isBinary()) {
-                        data = compress(field.binaryValue());
-                      }
-                      else {
-                        data = compress(field.stringValue().getBytes("UTF-8"));
-                      }
-                  }
-                  final int len = data.length;
-                  fieldsStream.writeVInt(len);
-                  fieldsStream.writeBytes(data, len);
-                }
-                else {
-                  // compression is disabled for the current field
-                  if (field.isBinary()) {
-                    byte[] data = field.binaryValue();
-                    final int len = data.length;
-                    fieldsStream.writeVInt(len);
-                    fieldsStream.writeBytes(data, len);
-                  }
-                  else {
-                    fieldsStream.writeString(field.stringValue());
-                  }
-                }
-            }
+            if (field.isStored())
+              writeField(fieldInfos.fieldInfo(field.name()), field);
         }
     }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileDeleter.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileDeleter.java Wed Jul  4 08:16:38 2007
@@ -97,6 +97,7 @@
   private PrintStream infoStream;
   private Directory directory;
   private IndexDeletionPolicy policy;
+  private DocumentsWriter docWriter;
 
   void setInfoStream(PrintStream infoStream) {
     this.infoStream = infoStream;
@@ -116,10 +117,12 @@
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
-  public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream)
+  public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, DocumentsWriter docWriter)
     throws CorruptIndexException, IOException {
 
+    this.docWriter = docWriter;
     this.infoStream = infoStream;
+
     this.policy = policy;
     this.directory = directory;
 
@@ -294,7 +297,7 @@
   public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
 
     if (infoStream != null) {
-      message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [isCommit = " + isCommit + "]");
+      message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
     }
 
     // Try again now to delete any previously un-deletable
@@ -310,6 +313,8 @@
 
     // Incref the files:
     incRef(segmentInfos, isCommit);
+    if (docWriter != null)
+      incRef(docWriter.files());
 
     if (isCommit) {
       // Append to our commits list:
@@ -325,9 +330,8 @@
     // DecRef old files from the last checkpoint, if any:
     int size = lastFiles.size();
     if (size > 0) {
-      for(int i=0;i<size;i++) {
+      for(int i=0;i<size;i++)
         decRef((List) lastFiles.get(i));
-      }
       lastFiles.clear();
     }
 
@@ -340,6 +344,8 @@
           lastFiles.add(segmentInfo.files());
         }
       }
+      if (docWriter != null)
+        lastFiles.add(docWriter.files());
     }
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java Wed Jul  4 08:16:38 2007
@@ -38,18 +38,54 @@
   /** Extension of norms file */
   static final String NORMS_EXTENSION = "nrm";
 
+  /** Extension of freq postings file */
+  static final String FREQ_EXTENSION = "frq";
+
+  /** Extension of prox postings file */
+  static final String PROX_EXTENSION = "prx";
+
+  /** Extension of terms file */
+  static final String TERMS_EXTENSION = "tis";
+
+  /** Extension of terms index file */
+  static final String TERMS_INDEX_EXTENSION = "tii";
+
+  /** Extension of stored fields index file */
+  static final String FIELDS_INDEX_EXTENSION = "fdx";
+
+  /** Extension of stored fields file */
+  static final String FIELDS_EXTENSION = "fdt";
+
+  /** Extension of vectors fields file */
+  static final String VECTORS_FIELDS_EXTENSION = "tvf";
+
+  /** Extension of vectors documents file */
+  static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
+
+  /** Extension of vectors index file */
+  static final String VECTORS_INDEX_EXTENSION = "tvx";
+
   /** Extension of compound file */
   static final String COMPOUND_FILE_EXTENSION = "cfs";
 
+  /** Extension of compound file for doc store files*/
+  static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
+
   /** Extension of deletes */
   static final String DELETES_EXTENSION = "del";
 
+  /** Extension of field infos */
+  static final String FIELD_INFOS_EXTENSION = "fnm";
+
   /** Extension of plain norms */
   static final String PLAIN_NORMS_EXTENSION = "f";
 
   /** Extension of separate norms */
   static final String SEPARATE_NORMS_EXTENSION = "s";
 
+  /** Extension of gen file */
+  static final String GEN_EXTENSION = "gen";
+
   /**
    * This array contains all filename extensions used by
    * Lucene's index files, with two exceptions, namely the
@@ -59,25 +95,72 @@
    * filename extension.
    */
   static final String INDEX_EXTENSIONS[] = new String[] {
-      "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
-      "tvx", "tvd", "tvf", "gen", "nrm" 
+    COMPOUND_FILE_EXTENSION,
+    FIELD_INFOS_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    TERMS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    DELETES_EXTENSION,
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    VECTORS_FIELDS_EXTENSION,
+    GEN_EXTENSION,
+    NORMS_EXTENSION,
+    COMPOUND_FILE_STORE_EXTENSION,
   };
 
   /** File extensions that are added to a compound file
    * (same as above, minus "del", "gen", "cfs"). */
   static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] {
-      "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx",
-      "tvx", "tvd", "tvf", "nrm" 
+    FIELD_INFOS_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    TERMS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    VECTORS_FIELDS_EXTENSION,
+    NORMS_EXTENSION
+  };
+
+  static final String[] STORE_INDEX_EXTENSIONS = new String[] {
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_FIELDS_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION
+  };
+
+  static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] {
+    FIELD_INFOS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    TERMS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    NORMS_EXTENSION
   };
   
   /** File extensions of old-style index files */
   static final String COMPOUND_EXTENSIONS[] = new String[] {
-    "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
+    FIELD_INFOS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    TERMS_EXTENSION
   };
   
   /** File extensions for term vector support */
   static final String VECTOR_EXTENSIONS[] = new String[] {
-    "tvx", "tvd", "tvf"
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    VECTORS_FIELDS_EXTENSION
   };
 
   /**

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java Wed Jul  4 08:16:38 2007
@@ -203,7 +203,8 @@
       indexWriter = new IndexWriter(directory, analyzer, false);
       indexWriter.setInfoStream(infoStream);
       indexWriter.setUseCompoundFile(useCompoundFile);
-      indexWriter.setMaxBufferedDocs(maxBufferedDocs);
+      if (maxBufferedDocs != 0)
+        indexWriter.setMaxBufferedDocs(maxBufferedDocs);
       indexWriter.setMaxFieldLength(maxFieldLength);
       indexWriter.setMergeFactor(mergeFactor);
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java Wed Jul  4 08:16:38 2007
@@ -783,7 +783,7 @@
         // KeepOnlyLastCommitDeleter:
         IndexFileDeleter deleter =  new IndexFileDeleter(directory,
                                                          deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
-                                                         segmentInfos, null);
+                                                         segmentInfos, null, null);
 
         // Checkpoint the state we are about to change, in
         // case we have to roll back:

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Wed Jul  4 08:16:38 2007
@@ -61,14 +61,19 @@
   When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
 
   <p>These changes are buffered in memory and periodically
-  flushed to the {@link Directory} (during the above method calls).  A flush is triggered when there are
-  enough buffered deletes (see {@link
-  #setMaxBufferedDeleteTerms}) or enough added documents
-  (see {@link #setMaxBufferedDocs}) since the last flush,
-  whichever is sooner.  You can also force a flush by
-  calling {@link #flush}.  When a flush occurs, both pending
-  deletes and added documents are flushed to the index.  A
-  flush may also trigger one or more segment merges.</p>
+  flushed to the {@link Directory} (during the above method
+  calls).  A flush is triggered when there are enough
+  buffered deletes (see {@link #setMaxBufferedDeleteTerms})
+  or enough added documents since the last flush, whichever
+  is sooner.  For the added documents, flushing is triggered
+  either by RAM usage of the documents (see {@link
+  #setRAMBufferSizeMB}) or the number of added documents
+  (this is the default; see {@link #setMaxBufferedDocs}).
+  For best indexing speed you should flush by RAM usage with
+  a large RAM buffer.  You can also force a flush by calling
+  {@link #flush}.  When a flush occurs, both pending deletes
+  and added documents are flushed to the index.  A flush may
+  also trigger one or more segment merges.</p>
 
   <a name="autoCommit"></a>
   <p>The optional <code>autoCommit</code> argument to the
@@ -181,7 +186,20 @@
   /**
    * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}.
    */
+
   public final static int DEFAULT_MAX_BUFFERED_DOCS = 10;
+  /* new merge policy
+  public final static int DEFAULT_MAX_BUFFERED_DOCS = 0;
+  */
+
+  /**
+   * Default value is 0 MB (which means flush only by doc
+   * count).  Change using {@link #setRAMBufferSizeMB}.
+   */
+  public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 0.0;
+  /* new merge policy
+  public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
+  */
 
   /**
    * Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}.
@@ -224,8 +242,7 @@
   private boolean autoCommit = true;              // false if we should commit only on close
 
   SegmentInfos segmentInfos = new SegmentInfos();       // the segments
-  SegmentInfos ramSegmentInfos = new SegmentInfos();    // the segments in ramDirectory
-  private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
+  private DocumentsWriter docWriter;
   private IndexFileDeleter deleter;
 
   private Lock writeLock;
@@ -621,11 +638,14 @@
         rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
       }
 
+      docWriter = new DocumentsWriter(directory, this);
+      docWriter.setInfoStream(infoStream);
+
       // Default deleter (for backwards compatibility) is
       // KeepOnlyLastCommitDeleter:
       deleter = new IndexFileDeleter(directory,
                                      deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
-                                     segmentInfos, infoStream);
+                                     segmentInfos, infoStream, docWriter);
 
     } catch (IOException e) {
       this.writeLock.release();
@@ -683,31 +703,64 @@
     return maxFieldLength;
   }
 
-  /** Determines the minimal number of documents required before the buffered
-   * in-memory documents are merged and a new Segment is created.
-   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
-   * large value gives faster indexing.  At the same time, mergeFactor limits
-   * the number of files open in a FSDirectory.
-   *
-   * <p> The default value is 10.
-   *
-   * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
+  /** Determines the minimal number of documents required
+   * before the buffered in-memory documents are flushed as
+   * a new Segment.  Large values generally gives faster
+   * indexing.
+   *
+   * <p>When this is set, the writer will flush every
+   * maxBufferedDocs added documents and never flush by RAM
+   * usage.</p>
+   *
+   * <p> The default value is 0 (writer flushes by RAM
+   * usage).</p>
+   *
+   * @throws IllegalArgumentException if maxBufferedDocs is
+   * smaller than 2
+   * @see #setRAMBufferSizeMB
    */
   public void setMaxBufferedDocs(int maxBufferedDocs) {
     ensureOpen();
     if (maxBufferedDocs < 2)
       throw new IllegalArgumentException("maxBufferedDocs must at least be 2");
-    this.minMergeDocs = maxBufferedDocs;
+    docWriter.setMaxBufferedDocs(maxBufferedDocs);
   }
 
   /**
-   * Returns the number of buffered added documents that will
+   * Returns 0 if this writer is flushing by RAM usage, else
+   * returns the number of buffered added documents that will
    * trigger a flush.
    * @see #setMaxBufferedDocs
    */
   public int getMaxBufferedDocs() {
     ensureOpen();
-    return minMergeDocs;
+    return docWriter.getMaxBufferedDocs();
+  }
+
+  /** Determines the amount of RAM that may be used for
+   * buffering added documents before they are flushed as a
+   * new Segment.  Generally for faster indexing performance
+   * it's best to flush by RAM usage instead of document
+   * count and use as large a RAM buffer as you can.
+   *
+   * <p>When this is set, the writer will flush whenever
+   * buffered documents use this much RAM.</p>
+   *
+   * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
+   */
+  public void setRAMBufferSizeMB(double mb) {
+    if (mb <= 0.0)
+      throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB");
+    docWriter.setRAMBufferSizeMB(mb);
+  }
+
+  /**
+   * Returns 0.0 if this writer is flushing by document
+   * count, else returns the value set by {@link
+   * #setRAMBufferSizeMB}.
+   */
+  public double getRAMBufferSizeMB() {
+    return docWriter.getRAMBufferSizeMB();
   }
 
   /**
@@ -788,6 +841,7 @@
   public void setInfoStream(PrintStream infoStream) {
     ensureOpen();
     this.infoStream = infoStream;
+    docWriter.setInfoStream(infoStream);
     deleter.setInfoStream(infoStream);
   }
 
@@ -871,7 +925,7 @@
    */
   public synchronized void close() throws CorruptIndexException, IOException {
     if (!closed) {
-      flushRamSegments();
+      flush(true, true);
 
       if (commitPending) {
         segmentInfos.write(directory);         // now commit changes
@@ -880,18 +934,79 @@
         rollbackSegmentInfos = null;
       }
 
-      ramDirectory.close();
       if (writeLock != null) {
         writeLock.release();                          // release write lock
         writeLock = null;
       }
       closed = true;
+      docWriter = null;
 
       if(closeDir)
         directory.close();
     }
   }
 
+  /** Tells the docWriter to close its currently open shared
+   *  doc stores (stored fields & vectors files). */
+  private void flushDocStores() throws IOException {
+
+    List files = docWriter.files();
+
+    if (files.size() > 0) {
+      String docStoreSegment;
+
+      boolean success = false;
+      try {
+        docStoreSegment = docWriter.closeDocStore();
+        success = true;
+      } finally {
+        if (!success)
+          docWriter.abort();
+      }
+
+      if (useCompoundFile && docStoreSegment != null) {
+        // Now build compound doc store file
+        checkpoint();
+
+        success = false;
+
+        final int numSegments = segmentInfos.size();
+
+        try {
+          CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+          final int size = files.size();
+          for(int i=0;i<size;i++)
+            cfsWriter.addFile((String) files.get(i));
+      
+          // Perform the merge
+          cfsWriter.close();
+
+          for(int i=0;i<numSegments;i++) {
+            SegmentInfo si = segmentInfos.info(i);
+            if (si.getDocStoreOffset() != -1 &&
+                si.getDocStoreSegment().equals(docStoreSegment))
+              si.setDocStoreIsCompoundFile(true);
+          }
+          checkpoint();
+          success = true;
+        } finally {
+          if (!success) {
+            // Rollback to no compound file
+            for(int i=0;i<numSegments;i++) {
+              SegmentInfo si = segmentInfos.info(i);
+              if (si.getDocStoreOffset() != -1 &&
+                  si.getDocStoreSegment().equals(docStoreSegment))
+                si.setDocStoreIsCompoundFile(false);
+            }
+            deleter.refresh();
+          }
+        }
+
+        deleter.checkpoint(segmentInfos, false);
+      }
+    }
+  }
+
   /** Release the write lock, if needed. */
   protected void finalize() throws Throwable {
     try {
@@ -916,11 +1031,10 @@
     return analyzer;
   }
 
-
   /** Returns the number of documents currently in this index. */
   public synchronized int docCount() {
     ensureOpen();
-    int count = ramSegmentInfos.size();
+    int count = docWriter.getNumDocsInRAM();
     for (int i = 0; i < segmentInfos.size(); i++) {
       SegmentInfo si = segmentInfos.info(i);
       count += si.docCount;
@@ -998,22 +1112,8 @@
    */
   public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
     ensureOpen();
-    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
-    synchronized (this) {
-      ramSegmentInfos.addElement(newSegmentInfo);
-      maybeFlushRamSegments();
-    }
-  }
-
-  SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)
-      throws CorruptIndexException, IOException {
-    DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
-    dw.setInfoStream(infoStream);
-    String segmentName = newRamSegmentName();
-    dw.addDocument(segmentName, doc);
-    SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false);
-    si.setNumFields(dw.getNumFields());
-    return si;
+    if (docWriter.addDocument(doc, analyzer))
+      flush(true, false);
   }
 
   /**
@@ -1025,7 +1125,7 @@
   public synchronized void deleteDocuments(Term term) throws CorruptIndexException, IOException {
     ensureOpen();
     bufferDeleteTerm(term);
-    maybeFlushRamSegments();
+    maybeFlush();
   }
 
   /**
@@ -1041,7 +1141,7 @@
     for (int i = 0; i < terms.length; i++) {
       bufferDeleteTerm(terms[i]);
     }
-    maybeFlushRamSegments();
+    maybeFlush();
   }
 
   /**
@@ -1077,16 +1177,13 @@
   public void updateDocument(Term term, Document doc, Analyzer analyzer)
       throws CorruptIndexException, IOException {
     ensureOpen();
-    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
     synchronized (this) {
       bufferDeleteTerm(term);
-      ramSegmentInfos.addElement(newSegmentInfo);
-      maybeFlushRamSegments();
     }
-  }
-
-  final synchronized String newRamSegmentName() {
-    return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX);
+    if (docWriter.addDocument(doc, analyzer))
+      flush(true, false);
+    else
+      maybeFlush();
   }
 
   // for test purpose
@@ -1095,8 +1192,8 @@
   }
 
   // for test purpose
-  final synchronized int getRamSegmentCount(){
-    return ramSegmentInfos.size();
+  final synchronized int getNumBufferedDocuments(){
+    return docWriter.getNumDocsInRAM();
   }
 
   // for test purpose
@@ -1108,7 +1205,7 @@
     }
   }
 
-  final synchronized String newSegmentName() {
+  final String newSegmentName() {
     return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
   }
 
@@ -1125,17 +1222,10 @@
    */
   private int mergeFactor = DEFAULT_MERGE_FACTOR;
 
-  /** Determines the minimal number of documents required before the buffered
-   * in-memory documents are merging and a new Segment is created.
-   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
-   * large value gives faster indexing.  At the same time, mergeFactor limits
-   * the number of files open in a FSDirectory.
-   *
-   * <p> The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}.
-
+  /** Determines amount of RAM usage by the buffered docs at
+   * which point we trigger a flush to the index.
    */
-  private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS;
-
+  private double ramBufferSize = DEFAULT_RAM_BUFFER_SIZE_MB*1024F*1024F;
 
   /** Determines the largest number of documents ever merged by addDocument().
    * Small values (e.g., less than 10,000) are best for interactive indexing,
@@ -1151,6 +1241,7 @@
 
    */
   private PrintStream infoStream = null;
+
   private static PrintStream defaultInfoStream = null;
 
   /** Merges all segments together into a single segment,
@@ -1219,16 +1310,16 @@
   */
   public synchronized void optimize() throws CorruptIndexException, IOException {
     ensureOpen();
-    flushRamSegments();
+    flush();
     while (segmentInfos.size() > 1 ||
            (segmentInfos.size() == 1 &&
             (SegmentReader.hasDeletions(segmentInfos.info(0)) ||
              SegmentReader.hasSeparateNorms(segmentInfos.info(0)) ||
              segmentInfos.info(0).dir != directory ||
              (useCompoundFile &&
-              (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))))) {
+              !segmentInfos.info(0).getUseCompoundFile())))) {
       int minSegment = segmentInfos.size() - mergeFactor;
-      mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size());
+      mergeSegments(minSegment < 0 ? 0 : minSegment, segmentInfos.size());
     }
   }
 
@@ -1245,7 +1336,7 @@
     localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
     localAutoCommit = autoCommit;
     if (localAutoCommit) {
-      flushRamSegments();
+      flush();
       // Turn off auto-commit during our local transaction:
       autoCommit = false;
     } else
@@ -1335,16 +1426,18 @@
       segmentInfos.clear();
       segmentInfos.addAll(rollbackSegmentInfos);
 
+      docWriter.abort();
+
       // Ask deleter to locate unreferenced files & remove
       // them:
       deleter.checkpoint(segmentInfos, false);
       deleter.refresh();
 
-      ramSegmentInfos = new SegmentInfos();
       bufferedDeleteTerms.clear();
       numBufferedDeleteTerms = 0;
 
       commitPending = false;
+      docWriter.abort();
       close();
 
     } else {
@@ -1439,7 +1532,7 @@
         for (int base = start; base < segmentInfos.size(); base++) {
           int end = Math.min(segmentInfos.size(), base+mergeFactor);
           if (end-base > 1) {
-            mergeSegments(segmentInfos, base, end);
+            mergeSegments(base, end);
           }
         }
       }
@@ -1479,7 +1572,7 @@
     // segments in S may not since they could come from multiple indexes.
     // Here is the merge algorithm for addIndexesNoOptimize():
     //
-    // 1 Flush ram segments.
+    // 1 Flush ram.
     // 2 Consider a combined sequence with segments from T followed
     //   by segments from S (same as current addIndexes(Directory[])).
     // 3 Assume the highest level for segments in S is h. Call
@@ -1500,13 +1593,18 @@
     // copy a segment, which may cause doc count to change because deleted
     // docs are garbage collected.
 
-    // 1 flush ram segments
+    // 1 flush ram
 
     ensureOpen();
-    flushRamSegments();
+    flush();
 
     // 2 copy segment infos and find the highest level from dirs
-    int startUpperBound = minMergeDocs;
+    int startUpperBound = docWriter.getMaxBufferedDocs();
+
+    /* new merge policy
+    if (startUpperBound == 0)
+      startUpperBound = 10;
+    */
 
     boolean success = false;
 
@@ -1566,7 +1664,7 @@
 
         // copy those segments from S
         for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
-          mergeSegments(segmentInfos, i, i + 1);
+          mergeSegments(i, i + 1);
         }
         if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
           success = true;
@@ -1575,7 +1673,7 @@
       }
 
       // invariants do not hold, simply merge those segments
-      mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
+      mergeSegments(segmentCount - numTailSegments, segmentCount);
 
       // maybe merge segments again if necessary
       if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
@@ -1637,7 +1735,8 @@
         }
 
         segmentInfos.setSize(0);                      // pop old infos & add new
-        info = new SegmentInfo(mergedName, docCount, directory, false, true);
+        info = new SegmentInfo(mergedName, docCount, directory, false, true,
+                               -1, null, false);
         segmentInfos.addElement(info);
 
         success = true;
@@ -1720,29 +1819,19 @@
    * buffered added documents or buffered deleted terms are
    * large enough.
    */
-  protected final void maybeFlushRamSegments() throws CorruptIndexException, IOException {
-    // A flush is triggered if enough new documents are buffered or
-    // if enough delete terms are buffered
-    if (ramSegmentInfos.size() >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms) {
-      flushRamSegments();
-    }
+  protected final synchronized void maybeFlush() throws CorruptIndexException, IOException {
+    // We only check for flush due to number of buffered
+    // delete terms, because triggering of a flush due to
+    // too many added documents is handled by
+    // DocumentsWriter
+    if (numBufferedDeleteTerms >= maxBufferedDeleteTerms && docWriter.setFlushPending())
+      flush(true, false);
   }
 
-  /** Expert:  Flushes all RAM-resident segments (buffered documents), then may merge segments. */
-  private final synchronized void flushRamSegments() throws CorruptIndexException, IOException {
-    flushRamSegments(true);
+  public final synchronized void flush() throws CorruptIndexException, IOException {  
+    flush(true, false);
   }
-    
-  /** Expert:  Flushes all RAM-resident segments (buffered documents), 
-   *           then may merge segments if triggerMerge==true. */
-  protected final synchronized void flushRamSegments(boolean triggerMerge) 
-      throws CorruptIndexException, IOException {
-    if (ramSegmentInfos.size() > 0 || bufferedDeleteTerms.size() > 0) {
-      mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
-      if (triggerMerge) maybeMergeSegments(minMergeDocs);
-    }
-  }
-  
+
   /**
    * Flush all in-memory buffered updates (adds and deletes)
    * to the Directory. 
@@ -1751,9 +1840,158 @@
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
-  public final synchronized void flush() throws CorruptIndexException, IOException {
+  public final synchronized void flush(boolean triggerMerge, boolean flushDocStores) throws CorruptIndexException, IOException {
     ensureOpen();
-    flushRamSegments();
+
+    // Make sure no threads are actively adding a document
+    docWriter.pauseAllThreads();
+
+    try {
+
+      SegmentInfo newSegment = null;
+
+      final int numDocs = docWriter.getNumDocsInRAM();
+
+      // Always flush docs if there are any
+      boolean flushDocs = numDocs > 0;
+
+      // With autoCommit=true we always must flush the doc
+      // stores when we flush
+      flushDocStores |= autoCommit;
+      String docStoreSegment = docWriter.getDocStoreSegment();
+      if (docStoreSegment == null)
+        flushDocStores = false;
+
+      // Always flush deletes if there are any delete terms.
+      // TODO: when autoCommit=false we don't have to flush
+      // deletes with every flushed segment; we can save
+      // CPU/IO by buffering longer & flushing deletes only
+      // when they are full or writer is being closed.  We
+      // have to fix the "applyDeletesSelectively" logic to
+      // apply to more than just the last flushed segment
+      boolean flushDeletes = bufferedDeleteTerms.size() > 0;
+
+      if (infoStream != null)
+        infoStream.println("  flush: flushDocs=" + flushDocs +
+                           " flushDeletes=" + flushDeletes +
+                           " flushDocStores=" + flushDocStores +
+                           " numDocs=" + numDocs);
+
+      int docStoreOffset = docWriter.getDocStoreOffset();
+      boolean docStoreIsCompoundFile = false;
+
+      // Check if the doc stores must be separately flushed
+      // because other segments, besides the one we are about
+      // to flush, reference it
+      if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
+        // We must separately flush the doc store
+        if (infoStream != null)
+          infoStream.println("  flush shared docStore segment " + docStoreSegment);
+      
+        flushDocStores();
+        flushDocStores = false;
+        docStoreIsCompoundFile = useCompoundFile;
+      }
+
+      String segment = docWriter.getSegment();
+
+      if (flushDocs || flushDeletes) {
+
+        SegmentInfos rollback = null;
+
+        if (flushDeletes)
+          rollback = (SegmentInfos) segmentInfos.clone();
+
+        boolean success = false;
+
+        try {
+          if (flushDocs) {
+
+            if (0 == docStoreOffset && flushDocStores) {
+              // This means we are flushing private doc stores
+              // with this segment, so it will not be shared
+              // with other segments
+              assert docStoreSegment != null;
+              assert docStoreSegment.equals(segment);
+              docStoreOffset = -1;
+              docStoreIsCompoundFile = false;
+              docStoreSegment = null;
+            }
+
+            int flushedDocCount = docWriter.flush(flushDocStores);
+          
+            newSegment = new SegmentInfo(segment,
+                                         flushedDocCount,
+                                         directory, false, true,
+                                         docStoreOffset, docStoreSegment,
+                                         docStoreIsCompoundFile);
+            segmentInfos.addElement(newSegment);
+          }
+
+          if (flushDeletes) {
+            // we should be able to change this so we can
+            // buffer deletes longer and then flush them to
+            // multiple flushed segments, when
+            // autoCommit=false
+            applyDeletes(flushDocs);
+            doAfterFlush();
+          }
+
+          checkpoint();
+          success = true;
+        } finally {
+          if (!success) {
+            if (flushDeletes) {
+              // Fully replace the segmentInfos since flushed
+              // deletes could have changed any of the
+              // SegmentInfo instances:
+              segmentInfos.clear();
+              segmentInfos.addAll(rollback);
+            } else {
+              // Remove segment we added, if any:
+              if (newSegment != null && 
+                  segmentInfos.size() > 0 && 
+                  segmentInfos.info(segmentInfos.size()-1) == newSegment)
+                segmentInfos.remove(segmentInfos.size()-1);
+            }
+            if (flushDocs)
+              docWriter.abort();
+            deleter.checkpoint(segmentInfos, false);
+            deleter.refresh();
+          }
+        }
+
+        deleter.checkpoint(segmentInfos, autoCommit);
+
+        if (flushDocs && useCompoundFile) {
+          success = false;
+          try {
+            docWriter.createCompoundFile(segment);
+            newSegment.setUseCompoundFile(true);
+            checkpoint();
+            success = true;
+          } finally {
+            if (!success) {
+              newSegment.setUseCompoundFile(false);
+              deleter.refresh();
+            }
+          }
+
+          deleter.checkpoint(segmentInfos, autoCommit);
+        }
+
+        /* new merge policy
+        if (0 == docWriter.getMaxBufferedDocs())
+          maybeMergeSegments(mergeFactor * numDocs / 2);
+        else
+          maybeMergeSegments(docWriter.getMaxBufferedDocs());
+        */
+        maybeMergeSegments(docWriter.getMaxBufferedDocs());
+      }
+    } finally {
+      docWriter.clearFlushPending();
+      docWriter.resumeAllThreads();
+    }
   }
 
   /** Expert:  Return the total size of all index files currently cached in memory.
@@ -1761,15 +1999,15 @@
    */
   public final long ramSizeInBytes() {
     ensureOpen();
-    return ramDirectory.sizeInBytes();
+    return docWriter.getRAMUsed();
   }
 
   /** Expert:  Return the number of documents whose segments are currently cached in memory.
-   * Useful when calling flushRamSegments()
+   * Useful when calling flush()
    */
   public final synchronized int numRamDocs() {
     ensureOpen();
-    return ramSegmentInfos.size();
+    return docWriter.getNumDocsInRAM();
   }
   
   /** Incremental segment merger.  */
@@ -1777,6 +2015,10 @@
     long lowerBound = -1;
     long upperBound = startUpperBound;
 
+    /* new merge policy
+    if (upperBound == 0) upperBound = 10;
+    */
+
     while (upperBound < maxMergeDocs) {
       int minSegment = segmentInfos.size();
       int maxSegment = -1;
@@ -1808,7 +2050,7 @@
         while (numSegments >= mergeFactor) {
           // merge the leftmost* mergeFactor segments
 
-          int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor);
+          int docCount = mergeSegments(minSegment, minSegment + mergeFactor);
           numSegments -= mergeFactor;
 
           if (docCount > upperBound) {
@@ -1837,39 +2079,108 @@
    * Merges the named range of segments, replacing them in the stack with a
    * single segment.
    */
-  private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end)
+
+  private final int mergeSegments(int minSegment, int end)
     throws CorruptIndexException, IOException {
 
-    // We may be called solely because there are deletes
-    // pending, in which case doMerge is false:
-    boolean doMerge = end > 0;
     final String mergedName = newSegmentName();
+    
     SegmentMerger merger = null;
-
-    final List ramSegmentsToDelete = new ArrayList();
-
     SegmentInfo newSegment = null;
 
     int mergedDocCount = 0;
-    boolean anyDeletes = (bufferedDeleteTerms.size() != 0);
 
     // This is try/finally to make sure merger's readers are closed:
     try {
 
-      if (doMerge) {
-        if (infoStream != null) infoStream.print("merging segments");
-        merger = new SegmentMerger(this, mergedName);
-
-        for (int i = minSegment; i < end; i++) {
-          SegmentInfo si = sourceSegments.info(i);
-          if (infoStream != null)
-            infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
-          IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE); // no need to set deleter (yet)
-          merger.add(reader);
-          if (reader.directory() == this.ramDirectory) {
-            ramSegmentsToDelete.add(si);
-          }
-        }
+      if (infoStream != null) infoStream.print("merging segments");
+
+      // Check whether this merge will allow us to skip
+      // merging the doc stores (stored field & vectors).
+      // This is a very substantial optimization (saves tons
+      // of IO) that can only be applied with
+      // autoCommit=false.
+
+      Directory lastDir = directory;
+      String lastDocStoreSegment = null;
+      boolean mergeDocStores = false;
+      boolean doFlushDocStore = false;
+      int next = -1;
+
+      // Test each segment to be merged
+      for (int i = minSegment; i < end; i++) {
+        SegmentInfo si = segmentInfos.info(i);
+
+        // If it has deletions we must merge the doc stores
+        if (si.hasDeletions())
+          mergeDocStores = true;
+
+        // If it has its own (private) doc stores we must
+        // merge the doc stores
+        if (-1 == si.getDocStoreOffset())
+          mergeDocStores = true;
+
+        // If it has a different doc store segment than
+        // previous segments, we must merge the doc stores
+        String docStoreSegment = si.getDocStoreSegment();
+        if (docStoreSegment == null)
+          mergeDocStores = true;
+        else if (lastDocStoreSegment == null)
+          lastDocStoreSegment = docStoreSegment;
+        else if (!lastDocStoreSegment.equals(docStoreSegment))
+          mergeDocStores = true;
+
+        // Segments' docScoreOffsets must be in-order,
+        // contiguous.  For the default merge policy now
+        // this will always be the case but for an arbitrary
+        // merge policy this may not be the case
+        if (-1 == next)
+          next = si.getDocStoreOffset() + si.docCount;
+        else if (next != si.getDocStoreOffset())
+          mergeDocStores = true;
+        else
+          next = si.getDocStoreOffset() + si.docCount;
+      
+        // If the segment comes from a different directory
+        // we must merge
+        if (lastDir != si.dir)
+          mergeDocStores = true;
+
+        // If the segment is referencing the current "live"
+        // doc store outputs then we must merge
+        if (si.getDocStoreOffset() != -1 && si.getDocStoreSegment().equals(docWriter.getDocStoreSegment()))
+          doFlushDocStore = true;
+      }
+
+      final int docStoreOffset;
+      final String docStoreSegment;
+      final boolean docStoreIsCompoundFile;
+      if (mergeDocStores) {
+        docStoreOffset = -1;
+        docStoreSegment = null;
+        docStoreIsCompoundFile = false;
+      } else {
+        SegmentInfo si = segmentInfos.info(minSegment);        
+        docStoreOffset = si.getDocStoreOffset();
+        docStoreSegment = si.getDocStoreSegment();
+        docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
+      }
+
+      if (mergeDocStores && doFlushDocStore)
+        // SegmentMerger intends to merge the doc stores
+        // (stored fields, vectors), and at least one of the
+        // segments to be merged refers to the currently
+        // live doc stores.
+        flushDocStores();
+
+      merger = new SegmentMerger(this, mergedName);
+
+      for (int i = minSegment; i < end; i++) {
+        SegmentInfo si = segmentInfos.info(i);
+        if (infoStream != null)
+          infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+        IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE, mergeDocStores); // no need to set deleter (yet)
+        merger.add(reader);
       }
 
       SegmentInfos rollback = null;
@@ -1879,65 +2190,32 @@
       // if we hit exception when doing the merge:
       try {
 
-        if (doMerge) {
-          mergedDocCount = merger.merge();
-
-          if (infoStream != null) {
-            infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
-          }
+        mergedDocCount = merger.merge(mergeDocStores);
 
-          newSegment = new SegmentInfo(mergedName, mergedDocCount,
-                                       directory, false, true);
+        if (infoStream != null) {
+          infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
         }
+
+        newSegment = new SegmentInfo(mergedName, mergedDocCount,
+                                     directory, false, true,
+                                     docStoreOffset,
+                                     docStoreSegment,
+                                     docStoreIsCompoundFile);
         
-        if (sourceSegments != ramSegmentInfos || anyDeletes) {
-          // Now save the SegmentInfo instances that
-          // we are replacing:
-          rollback = (SegmentInfos) segmentInfos.clone();
-        }
+        rollback = (SegmentInfos) segmentInfos.clone();
 
-        if (doMerge) {
-          if (sourceSegments == ramSegmentInfos) {
-            segmentInfos.addElement(newSegment);
-          } else {
-            for (int i = end-1; i > minSegment; i--)     // remove old infos & add new
-              sourceSegments.remove(i);
+        for (int i = end-1; i > minSegment; i--)     // remove old infos & add new
+          segmentInfos.remove(i);
 
-            segmentInfos.set(minSegment, newSegment);
-          }
-        }
+        segmentInfos.set(minSegment, newSegment);
 
-        if (sourceSegments == ramSegmentInfos) {
-          maybeApplyDeletes(doMerge);
-          doAfterFlush();
-        }
-        
         checkpoint();
 
         success = true;
 
       } finally {
-
-        if (success) {
-          // The non-ram-segments case is already committed
-          // (above), so all the remains for ram segments case
-          // is to clear the ram segments:
-          if (sourceSegments == ramSegmentInfos) {
-            ramSegmentInfos.removeAllElements();
-          }
-        } else {
-
-          // Must rollback so our state matches index:
-          if (sourceSegments == ramSegmentInfos && !anyDeletes) {
-            // Simple case: newSegment may or may not have
-            // been added to the end of our segment infos,
-            // so just check & remove if so:
-            if (newSegment != null && 
-                segmentInfos.size() > 0 && 
-                segmentInfos.info(segmentInfos.size()-1) == newSegment) {
-              segmentInfos.remove(segmentInfos.size()-1);
-            }
-          } else if (rollback != null) {
+        if (!success) {
+          if (rollback != null) {
             // Rollback the individual SegmentInfo
             // instances, but keep original SegmentInfos
             // instance (so we don't try to write again the
@@ -1952,16 +2230,13 @@
       }
     } finally {
       // close readers before we attempt to delete now-obsolete segments
-      if (doMerge) merger.closeReaders();
+      merger.closeReaders();
     }
 
-    // Delete the RAM segments
-    deleter.deleteDirect(ramDirectory, ramSegmentsToDelete);
-
     // Give deleter a chance to remove files now.
     deleter.checkpoint(segmentInfos, autoCommit);
 
-    if (useCompoundFile && doMerge) {
+    if (useCompoundFile) {
 
       boolean success = false;
 
@@ -1988,19 +2263,23 @@
   }
 
   // Called during flush to apply any buffered deletes.  If
-  // doMerge is true then a new segment was just created and
-  // flushed from the ram segments.
-  private final void maybeApplyDeletes(boolean doMerge) throws CorruptIndexException, IOException {
+  // flushedNewSegment is true then a new segment was just
+  // created and flushed from the ram segments, so we will
+  // selectively apply the deletes to that new segment.
+  private final void applyDeletes(boolean flushedNewSegment) throws CorruptIndexException, IOException {
 
     if (bufferedDeleteTerms.size() > 0) {
       if (infoStream != null)
         infoStream.println("flush " + numBufferedDeleteTerms + " buffered deleted terms on "
                            + segmentInfos.size() + " segments.");
 
-      if (doMerge) {
+      if (flushedNewSegment) {
         IndexReader reader = null;
         try {
-          reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
+          // Open readers w/o opening the stored fields /
+          // vectors because these files may still be held
+          // open for writing by docWriter
+          reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1), false);
 
           // Apply delete terms to the segment just flushed from ram
           // apply appropriately so that a delete term is only applied to
@@ -2018,14 +2297,14 @@
       }
 
       int infosEnd = segmentInfos.size();
-      if (doMerge) {
+      if (flushedNewSegment) {
         infosEnd--;
       }
 
       for (int i = 0; i < infosEnd; i++) {
         IndexReader reader = null;
         try {
-          reader = SegmentReader.get(segmentInfos.info(i));
+          reader = SegmentReader.get(segmentInfos.info(i), false);
 
           // Apply delete terms to disk segments
           // except the one just flushed from ram.
@@ -2049,7 +2328,12 @@
 
   private final boolean checkNonDecreasingLevels(int start) {
     int lowerBound = -1;
-    int upperBound = minMergeDocs;
+    int upperBound = docWriter.getMaxBufferedDocs();
+
+    /* new merge policy
+    if (upperBound == 0)
+      upperBound = 10;
+    */
 
     for (int i = segmentInfos.size() - 1; i >= start; i--) {
       int docCount = segmentInfos.info(i).docCount;
@@ -2098,10 +2382,11 @@
   // well as the disk segments.
   private void bufferDeleteTerm(Term term) {
     Num num = (Num) bufferedDeleteTerms.get(term);
+    int numDoc = docWriter.getNumDocsInRAM();
     if (num == null) {
-      bufferedDeleteTerms.put(term, new Num(ramSegmentInfos.size()));
+      bufferedDeleteTerms.put(term, new Num(numDoc));
     } else {
-      num.setNum(ramSegmentInfos.size());
+      num.setNum(numDoc);
     }
     numBufferedDeleteTerms++;
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java Wed Jul  4 08:16:38 2007
@@ -65,6 +65,12 @@
   private List files;                             // cached list of files that this segment uses
                                                   // in the Directory
 
+  private int docStoreOffset;                     // if this segment shares stored fields & vectors, this
+                                                  // offset is where in that file this segment's docs begin
+  private String docStoreSegment;                 // name used to derive fields/vectors file we share with
+                                                  // other segments
+  private boolean docStoreIsCompoundFile;         // whether doc store files are stored in compound file (*.cfx)
+
   public SegmentInfo(String name, int docCount, Directory dir) {
     this.name = name;
     this.docCount = docCount;
@@ -73,13 +79,25 @@
     isCompoundFile = CHECK_DIR;
     preLockless = true;
     hasSingleNormFile = false;
+    docStoreOffset = -1;
+    docStoreSegment = name;
+    docStoreIsCompoundFile = false;
   }
 
   public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { 
+    this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false);
+  }
+
+  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile,
+                     int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile) { 
     this(name, docCount, dir);
     this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
     this.hasSingleNormFile = hasSingleNormFile;
     preLockless = false;
+    this.docStoreOffset = docStoreOffset;
+    this.docStoreSegment = docStoreSegment;
+    this.docStoreIsCompoundFile = docStoreIsCompoundFile;
+    assert docStoreOffset == -1 || docStoreSegment != null;
   }
 
   /**
@@ -92,6 +110,8 @@
     dir = src.dir;
     preLockless = src.preLockless;
     delGen = src.delGen;
+    docStoreOffset = src.docStoreOffset;
+    docStoreIsCompoundFile = src.docStoreIsCompoundFile;
     if (src.normGen == null) {
       normGen = null;
     } else {
@@ -116,6 +136,20 @@
     docCount = input.readInt();
     if (format <= SegmentInfos.FORMAT_LOCKLESS) {
       delGen = input.readLong();
+      if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) {
+        docStoreOffset = input.readInt();
+        if (docStoreOffset != -1) {
+          docStoreSegment = input.readString();
+          docStoreIsCompoundFile = (1 == input.readByte());
+        } else {
+          docStoreSegment = name;
+          docStoreIsCompoundFile = false;
+        }
+      } else {
+        docStoreOffset = -1;
+        docStoreSegment = name;
+        docStoreIsCompoundFile = false;
+      }
       if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
         hasSingleNormFile = (1 == input.readByte());
       } else {
@@ -138,6 +172,9 @@
       isCompoundFile = CHECK_DIR;
       preLockless = true;
       hasSingleNormFile = false;
+      docStoreOffset = -1;
+      docStoreIsCompoundFile = false;
+      docStoreSegment = null;
     }
   }
   
@@ -368,6 +405,28 @@
       return dir.fileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
     }
   }
+
+  int getDocStoreOffset() {
+    return docStoreOffset;
+  }
+  
+  boolean getDocStoreIsCompoundFile() {
+    return docStoreIsCompoundFile;
+  }
+  
+  void setDocStoreIsCompoundFile(boolean v) {
+    docStoreIsCompoundFile = v;
+    files = null;
+  }
+  
+  String getDocStoreSegment() {
+    return docStoreSegment;
+  }
+  
+  void setDocStoreOffset(int offset) {
+    docStoreOffset = offset;
+    files = null;
+  }
   
   /**
    * Save this segment's info.
@@ -377,6 +436,12 @@
     output.writeString(name);
     output.writeInt(docCount);
     output.writeLong(delGen);
+    output.writeInt(docStoreOffset);
+    if (docStoreOffset != -1) {
+      output.writeString(docStoreSegment);
+      output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
+    }
+
     output.writeByte((byte) (hasSingleNormFile ? 1:0));
     if (normGen == null) {
       output.writeInt(NO);
@@ -389,6 +454,11 @@
     output.writeByte(isCompoundFile);
   }
 
+  private void addIfExists(List files, String fileName) throws IOException {
+    if (dir.fileExists(fileName))
+      files.add(fileName);
+  }
+
   /*
    * Return all files referenced by this SegmentInfo.  The
    * returns List is a locally cached List so you should not
@@ -409,13 +479,28 @@
     if (useCompoundFile) {
       files.add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
     } else {
-      for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) {
-        String ext = IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i];
-        String fileName = name + "." + ext;
-        if (dir.fileExists(fileName)) {
-          files.add(fileName);
-        }
+      final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
+      for(int i=0;i<exts.length;i++)
+        addIfExists(files, name + "." + exts[i]);
+    }
+
+    if (docStoreOffset != -1) {
+      // We are sharing doc stores (stored fields, term
+      // vectors) with other segments
+      assert docStoreSegment != null;
+      if (docStoreIsCompoundFile) {
+        files.add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+      } else {
+        final String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+        for(int i=0;i<exts.length;i++)
+          addIfExists(files, docStoreSegment + "." + exts[i]);
       }
+    } else if (!useCompoundFile) {
+      // We are not sharing, and, these files were not
+      // included in the compound file
+      final String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+      for(int i=0;i<exts.length;i++)
+        addIfExists(files, name + "." + exts[i]);
     }
 
     String delFileName = IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java Wed Jul  4 08:16:38 2007
@@ -51,8 +51,12 @@
    */
   public static final int FORMAT_SINGLE_NORM_FILE = -3;
 
+  /** This format allows multiple segments to share a single
+   * vectors and stored fields file. */
+  public static final int FORMAT_SHARED_DOC_STORE = -4;
+
   /* This must always point to the most recent file format. */
-  private static final int CURRENT_FORMAT = FORMAT_SINGLE_NORM_FILE;
+  private static final int CURRENT_FORMAT = FORMAT_SHARED_DOC_STORE;
   
   public int counter = 0;    // used to name new segments
   /**

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Wed Jul  4 08:16:38 2007
@@ -52,6 +52,12 @@
   
   private int mergedDocs;
 
+  // Whether we should merge doc stores (stored fields and
+  // vectors files).  When all segments we are merging
+  // already share the same doc store files, we don't need
+  // to merge the doc stores.
+  private boolean mergeDocStores;
+
   /** This ctor used only by test code.
    * 
    * @param dir The Directory to merge the other segments into
@@ -92,18 +98,32 @@
    * @throws IOException if there is a low-level IO error
    */
   final int merge() throws CorruptIndexException, IOException {
-    int value;
-    
+    return merge(true);
+  }
+
+  /**
+   * Merges the readers specified by the {@link #add} method
+   * into the directory passed to the constructor.
+   * @param mergeDocStores if false, we will not merge the
+   * stored fields nor vectors files
+   * @return The number of documents that were merged
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  final int merge(boolean mergeDocStores) throws CorruptIndexException, IOException {
+
+    this.mergeDocStores = mergeDocStores;
+
     mergedDocs = mergeFields();
     mergeTerms();
     mergeNorms();
 
-    if (fieldInfos.hasVectors())
+    if (mergeDocStores && fieldInfos.hasVectors())
       mergeVectors();
 
     return mergedDocs;
   }
-  
+
   /**
    * close all IndexReaders that have been added.
    * Should not be called before merge().
@@ -126,7 +146,10 @@
     
     // Basic files
     for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) {
-      files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
+      String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+      if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) &&
+                            !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
+        files.add(segment + "." + ext);
     }
 
     // Fieldable norm files
@@ -139,7 +162,7 @@
     }
 
     // Vector files
-    if (fieldInfos.hasVectors()) {
+    if (fieldInfos.hasVectors() && mergeDocStores) {
       for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) {
         files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
       }
@@ -173,7 +196,20 @@
    * @throws IOException if there is a low-level IO error
    */
   private final int mergeFields() throws CorruptIndexException, IOException {
-    fieldInfos = new FieldInfos();		  // merge field names
+
+    if (!mergeDocStores) {
+      // When we are not merging by doc stores, that means
+      // all segments were written as part of a single
+      // autoCommit=false IndexWriter session, so their field
+      // name -> number mapping are the same.  So, we start
+      // with the fieldInfos of the last segment in this
+      // case, to keep that numbering.
+      final SegmentReader sr = (SegmentReader) readers.elementAt(readers.size()-1);
+      fieldInfos = (FieldInfos) sr.fieldInfos.clone();
+    } else {
+      fieldInfos = new FieldInfos();		  // merge field names
+    }
+
     int docCount = 0;
     for (int i = 0; i < readers.size(); i++) {
       IndexReader reader = (IndexReader) readers.elementAt(i);
@@ -187,30 +223,40 @@
     }
     fieldInfos.write(directory, segment + ".fnm");
 
-    FieldsWriter fieldsWriter = // merge field values
-            new FieldsWriter(directory, segment, fieldInfos);
-    
-    // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
-    // in  merge mode, we use this FieldSelector
-    FieldSelector fieldSelectorMerge = new FieldSelector() {
-      public FieldSelectorResult accept(String fieldName) {
-        return FieldSelectorResult.LOAD_FOR_MERGE;
-      }        
-    };
+    if (mergeDocStores) {
+
+      FieldsWriter fieldsWriter = // merge field values
+        new FieldsWriter(directory, segment, fieldInfos);
     
-    try {
-      for (int i = 0; i < readers.size(); i++) {
-        IndexReader reader = (IndexReader) readers.elementAt(i);
-        int maxDoc = reader.maxDoc();
-        for (int j = 0; j < maxDoc; j++)
-          if (!reader.isDeleted(j)) {               // skip deleted docs
-            fieldsWriter.addDocument(reader.document(j, fieldSelectorMerge));
-            docCount++;
-          }
+      // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
+      // in  merge mode, we use this FieldSelector
+      FieldSelector fieldSelectorMerge = new FieldSelector() {
+          public FieldSelectorResult accept(String fieldName) {
+            return FieldSelectorResult.LOAD_FOR_MERGE;
+          }        
+        };
+
+      try {
+        for (int i = 0; i < readers.size(); i++) {
+          IndexReader reader = (IndexReader) readers.elementAt(i);
+          int maxDoc = reader.maxDoc();
+          for (int j = 0; j < maxDoc; j++)
+            if (!reader.isDeleted(j)) {               // skip deleted docs
+              fieldsWriter.addDocument(reader.document(j, fieldSelectorMerge));
+              docCount++;
+            }
+        }
+      } finally {
+        fieldsWriter.close();
       }
-    } finally {
-      fieldsWriter.close();
-    }
+
+    } else
+      // If we are skipping the doc stores, that means there
+      // are no deletions in any of these segments, so we
+      // just sum numDocs() of each segment to get total docCount
+      for (int i = 0; i < readers.size(); i++)
+        docCount += ((IndexReader) readers.elementAt(i)).numDocs();
+
     return docCount;
   }
 
@@ -355,6 +401,7 @@
     for (int i = 0; i < n; i++) {
       SegmentMergeInfo smi = smis[i];
       TermPositions postings = smi.getPositions();
+      assert postings != null;
       int base = smi.base;
       int[] docMap = smi.getDocMap();
       postings.seek(smi.termEnum);

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Wed Jul  4 08:16:38 2007
@@ -60,6 +60,7 @@
 
   // Compound File Reader when based on a compound file segment
   CompoundFileReader cfsReader = null;
+  CompoundFileReader storeCFSReader = null;
 
   private class Norm {
     public Norm(IndexInput in, int number, long normSeek)
@@ -128,7 +129,15 @@
    * @throws IOException if there is a low-level IO error
    */
   public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
-    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE);
+    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(SegmentInfo si, boolean doOpenStores) throws CorruptIndexException, IOException {
+    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores);
   }
 
   /**
@@ -136,7 +145,15 @@
    * @throws IOException if there is a low-level IO error
    */
   public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
-    return get(si.dir, si, null, false, false, readBufferSize);
+    return get(si.dir, si, null, false, false, readBufferSize, true);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
+    return get(si.dir, si, null, false, false, readBufferSize, doOpenStores);
   }
 
   /**
@@ -145,7 +162,7 @@
    */
   public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
                                   boolean closeDir) throws CorruptIndexException, IOException {
-    return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE);
+    return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true);
   }
 
   /**
@@ -157,6 +174,19 @@
                                   boolean closeDir, boolean ownDir,
                                   int readBufferSize)
     throws CorruptIndexException, IOException {
+    return get(dir, si, sis, closeDir, ownDir, readBufferSize, true);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(Directory dir, SegmentInfo si,
+                                  SegmentInfos sis,
+                                  boolean closeDir, boolean ownDir,
+                                  int readBufferSize,
+                                  boolean doOpenStores)
+    throws CorruptIndexException, IOException {
     SegmentReader instance;
     try {
       instance = (SegmentReader)IMPL.newInstance();
@@ -164,11 +194,11 @@
       throw new RuntimeException("cannot load SegmentReader class: " + e, e);
     }
     instance.init(dir, sis, closeDir, ownDir);
-    instance.initialize(si, readBufferSize);
+    instance.initialize(si, readBufferSize, doOpenStores);
     return instance;
   }
 
-  private void initialize(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
+  private void initialize(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
     segment = si.name;
     this.si = si;
 
@@ -178,17 +208,45 @@
       // Use compound file directory for some files, if it exists
       Directory cfsDir = directory();
       if (si.getUseCompoundFile()) {
-        cfsReader = new CompoundFileReader(directory(), segment + ".cfs", readBufferSize);
+        cfsReader = new CompoundFileReader(directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
         cfsDir = cfsReader;
       }
 
+      final Directory storeDir;
+
+      if (doOpenStores) {
+        if (si.getDocStoreOffset() != -1) {
+          if (si.getDocStoreIsCompoundFile()) {
+            storeCFSReader = new CompoundFileReader(directory(), si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
+            storeDir = storeCFSReader;
+          } else {
+            storeDir = directory();
+          }
+        } else {
+          storeDir = cfsDir;
+        }
+      } else
+        storeDir = null;
+
       // No compound file exists - use the multi-file format
       fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
-      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos, readBufferSize);
 
-      // Verify two sources of "maxDoc" agree:
-      if (fieldsReader.size() != si.docCount) {
-        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
+      final String fieldsSegment;
+      final Directory dir;
+
+      if (si.getDocStoreOffset() != -1)
+        fieldsSegment = si.getDocStoreSegment();
+      else
+        fieldsSegment = segment;
+
+      if (doOpenStores) {
+        fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize,
+                                        si.getDocStoreOffset(), si.docCount);
+
+        // Verify two sources of "maxDoc" agree:
+        if (si.getDocStoreOffset() == -1 && fieldsReader.size() != si.docCount) {
+          throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
+        }
       }
 
       tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
@@ -209,8 +267,13 @@
       proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
       openNorms(cfsDir, readBufferSize);
 
-      if (fieldInfos.hasVectors()) { // open term vector files only as needed
-        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos, readBufferSize);
+      if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
+        final String vectorsSegment;
+        if (si.getDocStoreOffset() != -1)
+          vectorsSegment = si.getDocStoreSegment();
+        else
+          vectorsSegment = segment;
+        termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
       }
       success = true;
     } finally {
@@ -273,6 +336,9 @@
 
     if (cfsReader != null)
       cfsReader.close();
+
+    if (storeCFSReader != null)
+      storeCFSReader.close();
   }
 
   static boolean hasDeletions(SegmentInfo si) throws IOException {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Wed Jul  4 08:16:38 2007
@@ -33,6 +33,10 @@
   private IndexInput tvd;
   private IndexInput tvf;
   private int size;
+
+  // The docID offset where our docs begin in the index
+  // file.  This will be 0 if we have our own private file.
+  private int docStoreOffset;
   
   private int tvdFormat;
   private int tvfFormat;
@@ -44,6 +48,11 @@
 
   TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos, int readBufferSize)
     throws CorruptIndexException, IOException {
+    this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE, -1, 0);
+  }
+    
+  TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
+    throws CorruptIndexException, IOException {
     if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
       tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION, readBufferSize);
       checkValidFormat(tvx);
@@ -51,7 +60,16 @@
       tvdFormat = checkValidFormat(tvd);
       tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION, readBufferSize);
       tvfFormat = checkValidFormat(tvf);
-      size = (int) tvx.length() / 8;
+      if (-1 == docStoreOffset) {
+        this.docStoreOffset = 0;
+        this.size = (int) (tvx.length() / 8);
+      } else {
+        this.docStoreOffset = docStoreOffset;
+        this.size = size;
+        // Verify the file is long enough to hold all of our
+        // docs
+        assert ((int) (tvx.length()/8)) >= size + docStoreOffset;
+      }
     }
 
     this.fieldInfos = fieldInfos;
@@ -102,7 +120,7 @@
       //We don't need to do this in other seeks because we already have the
       // file pointer
       //that was written in another file
-      tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      tvx.seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter.FORMAT_SIZE);
       //System.out.println("TVX Pointer: " + tvx.getFilePointer());
       long position = tvx.readLong();
 
@@ -154,7 +172,7 @@
     // Check if no term vectors are available for this segment at all
     if (tvx != null) {
       //We need to offset by
-      tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      tvx.seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter.FORMAT_SIZE);
       long position = tvx.readLong();
 
       tvd.seek(position);

Modified: lucene/java/trunk/src/java/org/apache/lucene/store/IndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/IndexOutput.java?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/IndexOutput.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/IndexOutput.java Wed Jul  4 08:16:38 2007
@@ -125,6 +125,31 @@
     }
   }
 
+  /** Writes a sequence of UTF-8 encoded characters from a char[].
+   * @param s the source of the characters
+   * @param start the first character in the sequence
+   * @param length the number of characters in the sequence
+   * @see IndexInput#readChars(char[],int,int)
+   */
+  public void writeChars(char[] s, int start, int length)
+    throws IOException {
+    final int end = start + length;
+    for (int i = start; i < end; i++) {
+      final int code = (int)s[i];
+      if (code >= 0x01 && code <= 0x7F)
+	writeByte((byte)code);
+      else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) {
+	writeByte((byte)(0xC0 | (code >> 6)));
+	writeByte((byte)(0x80 | (code & 0x3F)));
+      } else {
+	writeByte((byte)(0xE0 | (code >>> 12)));
+	writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
+	writeByte((byte)(0x80 | (code & 0x3F)));
+      }
+    }
+  }
+
+
   /** Forces any buffered output to be written. */
   public abstract void flush() throws IOException;
 

Modified: lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml?view=diff&rev=553236&r1=553235&r2=553236
==============================================================================
--- lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml Wed Jul  4 08:16:38 2007
@@ -60,6 +60,15 @@
                 Lucene will not be able to read the index.
             </p>
 
+            <p>
+                In version 2.3, the file format was changed to allow
+		segments to share a single set of doc store (vectors &amp;
+		stored fields) files.  This allows for faster indexing
+		in certain cases.  The change is fully backwards
+		compatible (in the same way as the lock-less commits
+		change in 2.1).
+            </p>
+
         </section>
 
         <section id="Definitions"><title>Definitions</title>
@@ -809,9 +818,15 @@
                     NormGen<sup>NumField</sup>,
                     IsCompoundFile&gt;<sup>SegCount</sup>
                 </p>
+                <p>
+                    <b>2.3 and above:</b>
+                    Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
+                    NormGen<sup>NumField</sup>,
+                    IsCompoundFile&gt;<sup>SegCount</sup>
+                </p>
 
                 <p>
-                    Format, NameCounter, SegCount, SegSize, NumField --&gt; Int32
+                    Format, NameCounter, SegCount, SegSize, NumField, DocStoreOffset --&gt; Int32
                 </p>
 
                 <p>
@@ -819,11 +834,11 @@
                 </p>
 
                 <p>
-                    SegName --&gt; String
+                    SegName, DocStoreSegment --&gt; String
                 </p>
 
                 <p>
-                    IsCompoundFile, HasSingleNormFile --&gt; Int8
+                    IsCompoundFile, HasSingleNormFile, DocStoreIsCompoundFile --&gt; Int8
                 </p>
 
                 <p>
@@ -889,6 +904,29 @@
                     "Normalization Factors" below for details.
                 </p>
 
+                <p>
+		    DocStoreOffset, DocStoreSegment,
+                    DocStoreIsCompoundFile: If DocStoreOffset is -1,
+                    this segment has its own doc store (stored fields
+                    values and term vectors) files and DocStoreSegment
+                    and DocStoreIsCompoundFile are not stored.  In
+                    this case all files for stored field values
+                    (<tt>*.fdt</tt> and <tt>*.fdx</tt>) and term
+                    vectors (<tt>*.tvf</tt>, <tt>*.tvd</tt> and
+                    <tt>*.tvx</tt>) will be stored with this segment.
+                    Otherwise, DocStoreSegment is the name of the
+                    segment that has the shared doc store files;
+                    DocStoreIsCompoundFile is 1 if that segment is
+                    stored in compound file format (as a <tt>.cfx</tt>
+                    file); and DocStoreOffset is the starting document
+                    in the shared doc store files where this segment's
+                    documents begin.  In this case, this segment does
+                    not store its own doc store files but instead
+                    shares a single set of these files with other
+                    segments.
+                </p>
+		
+
             </section>
 
             <section id="Lock File"><title>Lock File</title>
@@ -946,6 +984,14 @@
 
                 <p>FileData --&gt; raw file data</p>
                 <p>The raw file data is the data from the individual files named above.</p>
+
+		<p>Starting with Lucene 2.3, doc store files (stored
+		field values and term vectors) can be shared in a
+		single set of files for more than one segment.  When
+		compound file is enabled, these shared files will be
+		added into a single compound file (same format as
+		above) but with the extension <tt>.cfx</tt>.
+		</p>
 
             </section>
 



Mime
View raw message