lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r707836 - /lucene/java/trunk/src/java/org/apache/lucene/index/
Date Sat, 25 Oct 2008 10:40:01 GMT
Author: mikemccand
Date: Sat Oct 25 03:40:00 2008
New Revision: 707836

URL: http://svn.apache.org/viewvc?rev=707836&view=rev
Log:
LUCENE-1426: next steps towards flexible indexing: use the same API when writing a new segment

Added:
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsConsumer.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsWriter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsConsumer.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsWriter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsConsumer.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsWriter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsWriter.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentWriteState.java   (with props)
Modified:
    lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocConsumer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumers.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocInverter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java
    lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocConsumer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
    lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java Sat Oct 25 03:40:00 2008
@@ -54,6 +54,14 @@
     lastSkipProxPointer = new long[numberOfSkipLevels];
   }
 
+  void setFreqOutput(IndexOutput freqOutput) {
+    this.freqOutput = freqOutput;
+  }
+
+  void setProxOutput(IndexOutput proxOutput) {
+    this.proxOutput = proxOutput;
+  }
+
   /**
    * Sets the values for the current skip data. 
    */

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocConsumer.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocConsumer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocConsumer.java Sat Oct 25 03:40:00 2008
@@ -22,8 +22,8 @@
 
 abstract class DocConsumer {
   abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException;
-  abstract void flush(final Collection threads, final DocumentsWriter.FlushState state) throws IOException;
-  abstract void closeDocStore(final DocumentsWriter.FlushState state) throws IOException;
+  abstract void flush(final Collection threads, final SegmentWriteState state) throws IOException;
+  abstract void closeDocStore(final SegmentWriteState state) throws IOException;
   abstract void abort();
   abstract boolean freeRAM();
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumer.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumer.java Sat Oct 25 03:40:00 2008
@@ -26,11 +26,11 @@
 
   /** Called when DocumentsWriter decides to create a new
    *  segment */
-  abstract void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException;
+  abstract void flush(Map threadsAndFields, SegmentWriteState state) throws IOException;
 
   /** Called when DocumentsWriter decides to close the doc
    *  stores */
-  abstract void closeDocStore(DocumentsWriter.FlushState state) throws IOException;
+  abstract void closeDocStore(SegmentWriteState state) throws IOException;
   
   /** Called when an aborting exception is hit */
   abstract void abort();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumers.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumers.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumers.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldConsumers.java Sat Oct 25 03:40:00 2008
@@ -44,7 +44,7 @@
     two.setFieldInfos(fieldInfos);
   }
 
-  public void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException {
+  public void flush(Map threadsAndFields, SegmentWriteState state) throws IOException {
 
     Map oneThreadsAndFields = new HashMap();
     Map twoThreadsAndFields = new HashMap();
@@ -76,7 +76,7 @@
     two.flush(twoThreadsAndFields, state);
   }
 
-  public void closeDocStore(DocumentsWriter.FlushState state) throws IOException {      
+  public void closeDocStore(SegmentWriteState state) throws IOException {      
     try {
       one.closeDocStore(state);
     } finally {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessor.java Sat Oct 25 03:40:00 2008
@@ -43,11 +43,11 @@
     consumer.setFieldInfos(fieldInfos);
   }
 
-  public void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
+  public void closeDocStore(SegmentWriteState state) throws IOException {
     consumer.closeDocStore(state);
   }
 
-  public void flush(Collection threads, DocumentsWriter.FlushState state) throws IOException {
+  public void flush(Collection threads, SegmentWriteState state) throws IOException {
 
     Map childThreadsAndFields = new HashMap();
     Iterator it = threads.iterator();
@@ -63,7 +63,9 @@
     // consumer can alter the FieldInfo* if necessary.  EG,
     // FreqProxTermsWriter does this with
     // FieldInfo.storePayload.
-    fieldInfos.write(state.directory, state.segmentName + ".fnm");
+    final String fileName = state.segmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
+    fieldInfos.write(state.directory, fileName);
+    state.flushedFiles.add(fileName);
   }
 
   public void abort() {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Sat Oct 25 03:40:00 2008
@@ -87,7 +87,7 @@
   /** If there are fields we've seen but did not see again
    *  in the last run, then free them up. */
 
-  void trimFields(DocumentsWriter.FlushState state) {
+  void trimFields(SegmentWriteState state) {
 
     for(int i=0;i<fieldHash.length;i++) {
       DocFieldProcessorPerField perField = fieldHash[i];

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocInverter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocInverter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocInverter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocInverter.java Sat Oct 25 03:40:00 2008
@@ -44,7 +44,7 @@
     endConsumer.setFieldInfos(fieldInfos);
   }
 
-  void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException {
+  void flush(Map threadsAndFields, SegmentWriteState state) throws IOException {
 
     Map childThreadsAndFields = new HashMap();
     Map endChildThreadsAndFields = new HashMap();
@@ -75,7 +75,7 @@
     endConsumer.flush(endChildThreadsAndFields, state);
   }
 
-  public void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
+  public void closeDocStore(SegmentWriteState state) throws IOException {
     consumer.closeDocStore(state);
     endConsumer.closeDocStore(state);
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Oct 25 03:40:00 2008
@@ -156,20 +156,6 @@
     }
   }
 
-  static class FlushState {
-    DocumentsWriter docWriter;
-    Directory directory;
-    String segmentName;
-    String docStoreSegmentName;
-    int numDocsInRAM;
-    int numDocsInStore;
-    Collection flushedFiles;
-
-    public String segmentFileName(String ext) {
-      return segmentName + "." + ext;
-    }
-  }
-
   /** Consumer returns this on each doc.  This holds any
    *  state that must be flushed synchronized "in docID
    *  order".  We gather these and flush them in order. */
@@ -402,7 +388,7 @@
 
   private Collection abortedFiles;               // List of files that were written before last abort()
 
-  private FlushState flushState;
+  private SegmentWriteState flushState;
 
   Collection abortedFiles() {
     return abortedFiles;
@@ -545,18 +531,7 @@
 
   synchronized private void initFlushState(boolean onlyDocStore) {
     initSegmentName(onlyDocStore);
-
-    if (flushState == null) {
-      flushState = new FlushState();
-      flushState.directory = directory;
-      flushState.docWriter = this;
-    }
-
-    flushState.docStoreSegmentName = docStoreSegment;
-    flushState.segmentName = segment;
-    flushState.numDocsInRAM = numDocsInRAM;
-    flushState.numDocsInStore = numDocsInStore;
-    flushState.flushedFiles = new HashSet();
+    flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.getTermIndexInterval());
   }
 
   /** Flush all pending docs to a new segment */
@@ -602,7 +577,7 @@
         message(message);
       }
 
-      flushedDocCount += flushState.numDocsInRAM;
+      flushedDocCount += flushState.numDocs;
 
       doAfterFlush();
 
@@ -616,7 +591,7 @@
 
     assert waitQueue.waitingBytes == 0;
 
-    return flushState.numDocsInRAM;
+    return flushState.numDocs;
   }
 
   /** Build compound file for the segment we just flushed */

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsConsumer.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsConsumer.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsConsumer.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,34 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * NOTE: this API is experimental and will likely change
+ */
+
+abstract class FormatPostingsDocsConsumer {
+
+  /** Adds a new doc in this term.  If this returns null
+   *  then we just skip consuming positions/payloads. */
+  abstract FormatPostingsPositionsConsumer addDoc(int docID, int termDocFreq) throws IOException;
+
+  /** Called when we are done adding docs to this term */
+  abstract void finish() throws IOException;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsWriter.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsWriter.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,127 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Consumes doc & freq, writing them using the current
+ *  index file format */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.store.IndexOutput;
+
+final class FormatPostingsDocsWriter extends FormatPostingsDocsConsumer {
+
+  final IndexOutput out;
+  final FormatPostingsTermsWriter parent;
+  final FormatPostingsPositionsWriter posWriter;
+  final DefaultSkipListWriter skipListWriter;
+  final int skipInterval;
+  final int totalNumDocs;
+
+  boolean omitTF;
+  boolean storePayloads;
+  long freqStart;
+  FieldInfo fieldInfo;
+
+  FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent) throws IOException {
+    super();
+    this.parent = parent;
+    final String fileName = IndexFileNames.segmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
+    state.flushedFiles.add(fileName);
+    out = parent.parent.dir.createOutput(fileName);
+    totalNumDocs = parent.parent.totalNumDocs;
+
+    // TODO: abstraction violation
+    skipInterval = parent.parent.termsOut.skipInterval;
+    skipListWriter = parent.parent.skipListWriter;
+    skipListWriter.setFreqOutput(out);
+
+    posWriter = new FormatPostingsPositionsWriter(state, this);
+  }
+
+  void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    omitTF = fieldInfo.omitTf;
+    storePayloads = fieldInfo.storePayloads;
+    posWriter.setField(fieldInfo);
+  }
+
+  int lastDocID;
+  int df;
+
+  /** Adds a new doc in this term.  If this returns null
+   *  then we just skip consuming positions/payloads. */
+  FormatPostingsPositionsConsumer addDoc(int docID, int termDocFreq) throws IOException {
+
+    final int delta = docID - lastDocID;
+
+    if (docID < 0 || (df > 0 && delta <= 0))
+      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
+
+    if ((++df % skipInterval) == 0) {
+      // TODO: abstraction violation
+      skipListWriter.setSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
+      skipListWriter.bufferSkip(df);
+    }
+
+    assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
+
+    lastDocID = docID;
+    if (omitTF)
+      out.writeVInt(delta);
+    else if (1 == termDocFreq)
+      out.writeVInt((delta<<1) | 1);
+    else {
+      out.writeVInt(delta<<1);
+      out.writeVInt(termDocFreq);
+    }
+
+    return posWriter;
+  }
+
+  private final TermInfo termInfo = new TermInfo();  // minimize consing
+  final UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
+
+  /** Called when we are done adding docs to this term */
+  void finish() throws IOException {
+    long skipPointer = skipListWriter.writeSkip(out);
+
+    // TODO: this is abstraction violation -- we should not
+    // peek up into parents terms encoding format
+    termInfo.set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart));
+
+    // TODO: we could do this incrementally
+    UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8);
+
+    if (df > 0) {
+      parent.termsOut.add(fieldInfo.number,
+                          utf8.result,
+                          utf8.length,
+                          termInfo);
+    }
+
+    lastDocID = 0;
+    df = 0;
+  }
+
+  void close() throws IOException {
+    out.close();
+    posWriter.close();
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsDocsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsConsumer.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsConsumer.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsConsumer.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,36 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/** Abstract API that consumes terms, doc, freq, prox and
+ *  payloads postings.  Concrete implementations of this
+ *  actually do "something" with the postings (write it into
+ *  the index in a specific format).
+ *
+ * NOTE: this API is experimental and will likely change
+ */
+abstract class FormatPostingsFieldsConsumer {
+
+  /** Add a new field */
+  abstract FormatPostingsTermsConsumer addField(FieldInfo field) throws IOException;
+
+  /** Called when we are done adding everything. */
+  abstract void finish() throws IOException;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsWriter.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsWriter.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,73 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+final class FormatPostingsFieldsWriter extends FormatPostingsFieldsConsumer {
+
+  final Directory dir;
+  final String segment;
+  final TermInfosWriter termsOut;
+  final FieldInfos fieldInfos;
+  final FormatPostingsTermsWriter termsWriter;
+  final DefaultSkipListWriter skipListWriter;
+  final int totalNumDocs;
+
+  public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos) throws IOException {
+    super();
+
+    dir = state.directory;
+    segment = state.segmentName;
+    totalNumDocs = state.numDocs;
+    this.fieldInfos = fieldInfos;
+    termsOut = new TermInfosWriter(dir,
+                                   segment,
+                                   fieldInfos,
+                                   state.termIndexInterval);
+
+    // TODO: this is a nasty abstraction violation (that we
+    // peek down to find freqOut/proxOut) -- we need a
+    // better abstraction here whereby these child consumers
+    // can provide skip data or not
+    skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
+                                               termsOut.maxSkipLevels,
+                                               totalNumDocs,
+                                               null,
+                                               null);
+
+    state.flushedFiles.add(state.segmentFileName(IndexFileNames.TERMS_EXTENSION));
+    state.flushedFiles.add(state.segmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
+
+    termsWriter = new FormatPostingsTermsWriter(state, this);
+  }
+
+  /** Add a new field */
+  FormatPostingsTermsConsumer addField(FieldInfo field) {
+    termsWriter.setField(field);
+    return termsWriter;
+  }
+
+  /** Called when we are done adding everything. */
+  void finish() throws IOException {
+    termsOut.close();
+    termsWriter.close();
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsFieldsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsConsumer.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsConsumer.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsConsumer.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,32 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexInput;
+
+abstract class FormatPostingsPositionsConsumer {
+
+  /** Add a new position & payload.  If payloadLength > 0
+   *  you must read those bytes from the IndexInput. */
+  abstract void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException;
+
+  /** Called when we are done adding positions & payloads */
+  abstract void finish() throws IOException;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsWriter.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsWriter.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,87 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.IndexInput;
+
+import java.io.IOException;
+
+final class FormatPostingsPositionsWriter extends FormatPostingsPositionsConsumer {
+
+  final FormatPostingsDocsWriter parent;
+  final IndexOutput out;
+
+  boolean omitTF;
+  boolean storePayloads;
+  int lastPayloadLength = -1;
+
+  FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) throws IOException {
+    this.parent = parent;
+    omitTF = parent.omitTF;
+    if (parent.parent.parent.fieldInfos.hasProx()) {
+      // At least one field does not omit TF, so create the
+      // prox file
+      final String fileName = IndexFileNames.segmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION);
+      state.flushedFiles.add(fileName);
+      out = parent.parent.parent.dir.createOutput(fileName);
+      parent.skipListWriter.setProxOutput(out);
+    } else
+      // Every field omits TF so we will write no prox file
+      out = null;
+  }
+
+  int lastPosition;
+
+  /** Add a new position & payload */
+  void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException {
+    assert !omitTF: "omitTF is true";
+    assert out != null;
+
+    final int delta = position - lastPosition;
+    lastPosition = position;
+
+    if (storePayloads) {
+      if (payloadLength != lastPayloadLength) {
+        lastPayloadLength = payloadLength;
+        out.writeVInt((delta<<1)|1);
+        out.writeVInt(payloadLength);
+      } else
+        out.writeVInt(delta << 1);
+      if (payloadLength > 0)
+        out.writeBytes(payload, payloadLength);
+    } else
+      out.writeVInt(delta);
+  }
+
+  void setField(FieldInfo fieldInfo) {
+    omitTF = fieldInfo.omitTf;
+    storePayloads = omitTF ? false : fieldInfo.storePayloads;
+  }
+
+  /** Called when we are done adding positions & payloads */
+  void finish() {       
+    lastPosition = 0;
+    lastPayloadLength = -1;
+  }
+
+  void close() throws IOException {
+    if (out != null)
+      out.close();
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsPositionsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,46 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.ArrayUtil;
+
+/**
+ * NOTE: this API is experimental and will likely change
+ */
+
+abstract class FormatPostingsTermsConsumer {
+
+  /** Adds a new term in this field; term ends with U+FFFF
+   *  char */
+  abstract FormatPostingsDocsConsumer addTerm(char[] text, int start) throws IOException;
+
+  char[] termBuffer;
+  FormatPostingsDocsConsumer addTerm(String text) throws IOException {
+    final int len = text.length();
+    if (termBuffer == null || termBuffer.length < 1+len)
+      termBuffer = new char[ArrayUtil.getNextSize(1+len)];
+    text.getChars(0, len, termBuffer, 0);
+    termBuffer[len] = 0xffff;
+    return addTerm(termBuffer, 0);
+  }
+
+  /** Called when we are done adding terms to this field */
+  abstract void finish() throws IOException;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsWriter.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsWriter.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,71 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+final class FormatPostingsTermsWriter extends FormatPostingsTermsConsumer {
+
+  final FormatPostingsFieldsWriter parent;
+  final FormatPostingsDocsWriter docsWriter;
+  final TermInfosWriter termsOut;
+  FieldInfo fieldInfo;
+
+  FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent) throws IOException {
+    super();
+    this.parent = parent;
+    termsOut = parent.termsOut;
+    docsWriter = new FormatPostingsDocsWriter(state, this);
+  }
+
+  void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    docsWriter.setField(fieldInfo);
+  }
+
+  char[] currentTerm;
+  int currentTermStart;
+
+  long freqStart;
+  long proxStart;
+
+  /** Adds a new term in this field */
+  FormatPostingsDocsConsumer addTerm(char[] text, int start) {
+    currentTerm = text;
+    currentTermStart = start;
+
+    // TODO: this is abstraction violation -- ideally this
+    // terms writer is not so "invasive", looking for file
+    // pointers in its child consumers.
+    freqStart = docsWriter.out.getFilePointer();
+    if (docsWriter.posWriter.out != null)
+      proxStart = docsWriter.posWriter.out.getFilePointer();
+
+    parent.skipListWriter.resetSkip();
+
+    return docsWriter;
+  }
+
+  /** Called when we are done adding terms to this field */
+  void finish() {
+  }
+
+  void close() throws IOException {
+    docsWriter.close();
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FormatPostingsTermsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Sat Oct 25 03:40:00 2008
@@ -57,7 +57,7 @@
     }
   }
 
-  void closeDocStore(DocumentsWriter.FlushState state) {}
+  void closeDocStore(SegmentWriteState state) {}
   void abort() {}
 
 
@@ -66,7 +66,7 @@
   // under the same FieldInfo together, up into TermsHash*.
   // Other writers would presumably share alot of this...
 
-  public void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
+  public void flush(Map threadsAndFields, final SegmentWriteState state) throws IOException {
 
     // Gather all FieldData's that have postings, across all
     // ThreadStates
@@ -92,22 +92,19 @@
     Collections.sort(allFields);
     final int numAllFields = allFields.size();
 
-    final TermInfosWriter termsOut = new TermInfosWriter(state.directory,
-                                                         state.segmentName,
-                                                         fieldInfos,
-                                                         state.docWriter.writer.getTermIndexInterval());
-
-    final IndexOutput freqOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.FREQ_EXTENSION));
-    final IndexOutput proxOut;
-
-    if (fieldInfos.hasProx())
-      proxOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.PROX_EXTENSION));
-    else
-      proxOut = null;
-
-    final DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
-                                                                           termsOut.maxSkipLevels,
-                                                                           state.numDocsInRAM, freqOut, proxOut);
+    // TODO: allow Lucene user to customize this consumer:
+    final FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
+    /*
+    Current writer chain:
+      FormatPostingsFieldsConsumer
+        -> IMPL: FormatPostingsFieldsWriter
+          -> FormatPostingsTermsConsumer
+            -> IMPL: FormatPostingsTermsWriter
+              -> FormatPostingsDocConsumer
+                -> IMPL: FormatPostingsDocWriter
+                  -> FormatPostingsPositionsConsumer
+                    -> IMPL: FormatPostingsPositionsWriter
+    */
 
     int start = 0;
     while(start < numAllFields) {
@@ -129,7 +126,7 @@
 
       // If this field has postings then add them to the
       // segment
-      appendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter);
+      appendPostings(fields, consumer);
 
       for(int i=0;i<fields.length;i++) {
         TermsHashPerField perField = fields[i].termsHashPerField;
@@ -149,51 +146,18 @@
       perThread.termsHashPerThread.reset(true);
     }
 
-    freqOut.close();
-    if (proxOut != null) {
-      state.flushedFiles.add(state.segmentFileName(IndexFileNames.PROX_EXTENSION));
-      proxOut.close();
-    }
-    termsOut.close();
-    
-    // Record all files we have flushed
-    state.flushedFiles.add(state.segmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION));
-    state.flushedFiles.add(state.segmentFileName(IndexFileNames.FREQ_EXTENSION));
-    state.flushedFiles.add(state.segmentFileName(IndexFileNames.TERMS_EXTENSION));
-    state.flushedFiles.add(state.segmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
+    consumer.finish();
   }
 
-  final byte[] copyByteBuffer = new byte[4096];
-
-  /** Copy numBytes from srcIn to destIn */
-  void copyBytes(IndexInput srcIn, IndexOutput destIn, long numBytes) throws IOException {
-    // TODO: we could do this more efficiently (save a copy)
-    // because it's always from a ByteSliceReader ->
-    // IndexOutput
-    while(numBytes > 0) {
-      final int chunk;
-      if (numBytes > 4096)
-        chunk = 4096;
-      else
-        chunk = (int) numBytes;
-      srcIn.readBytes(copyByteBuffer, 0, chunk);
-      destIn.writeBytes(copyByteBuffer, 0, chunk);
-      numBytes -= chunk;
-    }
-  }
+  private byte[] payloadBuffer;
 
   /* Walk through all unique text tokens (Posting
    * instances) found in this field and serialize them
    * into a single RAM segment. */
-  void appendPostings(final DocumentsWriter.FlushState flushState,
-                      FreqProxTermsWriterPerField[] fields,
-                      TermInfosWriter termsOut,
-                      IndexOutput freqOut,
-                      IndexOutput proxOut,
-                      DefaultSkipListWriter skipListWriter)
+  void appendPostings(FreqProxTermsWriterPerField[] fields,
+                      FormatPostingsFieldsConsumer consumer)
     throws CorruptIndexException, IOException {
 
-    final int fieldNumber = fields[0].fieldInfo.number;
     int numFields = fields.length;
 
     final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];
@@ -208,15 +172,12 @@
       assert result;
     }
 
-    final int skipInterval = termsOut.skipInterval;
-    final boolean currentFieldOmitTf = fields[0].fieldInfo.omitTf;
+    final FormatPostingsTermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo);
 
-    // If current field omits tf then it cannot store
-    // payloads.  We silently drop the payloads in this case:
-    final boolean currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads;
-  
     FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
 
+    final boolean currentFieldOmitTf = fields[0].fieldInfo.omitTf;
+
     while(numFields > 0) {
 
       // Get the next term to merge
@@ -235,43 +196,21 @@
           termStates[numToMerge++] = mergeStates[i];
       }
 
-      int df = 0;
-      int lastPayloadLength = -1;
-
-      int lastDoc = 0;
-
-      final char[] text = termStates[0].text;
-      final int start = termStates[0].textOffset;
-
-      final long freqPointer = freqOut.getFilePointer();
-      final long proxPointer;
-      if (proxOut != null)
-        proxPointer = proxOut.getFilePointer();
-      else
-        proxPointer = 0;
-
-      skipListWriter.resetSkip();
+      final FormatPostingsDocsConsumer docConsumer = termsConsumer.addTerm(termStates[0].text, termStates[0].textOffset);
 
       // Now termStates has numToMerge FieldMergeStates
       // which all share the same term.  Now we must
       // interleave the docID streams.
       while(numToMerge > 0) {
         
-        if ((++df % skipInterval) == 0) {
-          skipListWriter.setSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength);
-          skipListWriter.bufferSkip(df);
-        }
-
         FreqProxFieldMergeState minState = termStates[0];
         for(int i=1;i<numToMerge;i++)
           if (termStates[i].docID < minState.docID)
             minState = termStates[i];
 
-        final int doc = minState.docID;
         final int termDocFreq = minState.termFreq;
 
-        assert doc < flushState.numDocsInRAM;
-        assert doc > lastDoc || df == 1;
+        final FormatPostingsPositionsConsumer posConsumer = docConsumer.addDoc(minState.docID, termDocFreq);
 
         final ByteSliceReader prox = minState.prox;
 
@@ -279,47 +218,32 @@
         // changing the format to match Lucene's segment
         // format.
         if (!currentFieldOmitTf) {
-          // omitTf == false so we do write positions & payload          
-          assert proxOut != null;
+          // omitTf == false so we do write positions &
+          // payload          
+          int position = 0;
           for(int j=0;j<termDocFreq;j++) {
             final int code = prox.readVInt();
-            if (currentFieldStorePayloads) {
-              final int payloadLength;
-              if ((code & 1) != 0) {
-                // This position has a payload
-                payloadLength = prox.readVInt();
-              } else
-                payloadLength = 0;
-              if (payloadLength != lastPayloadLength) {
-                proxOut.writeVInt(code|1);
-                proxOut.writeVInt(payloadLength);
-                lastPayloadLength = payloadLength;
-              } else
-                proxOut.writeVInt(code & (~1));
-              if (payloadLength > 0)
-                copyBytes(prox, proxOut, payloadLength);
-            } else {
-              assert 0 == (code & 1);
-              proxOut.writeVInt(code>>1);
-            }
+            position += code >> 1;
+
+            final int payloadLength;
+            if ((code & 1) != 0) {
+              // This position has a payload
+              payloadLength = prox.readVInt();
+
+              if (payloadBuffer == null || payloadBuffer.length < payloadLength)
+                payloadBuffer = new byte[payloadLength];
+
+              prox.readBytes(payloadBuffer, 0, payloadLength);
+
+            } else
+              payloadLength = 0;
+
+            posConsumer.addPosition(position, payloadBuffer, 0, payloadLength);
           } //End for
-          
-          final int newDocCode = (doc-lastDoc)<<1;
 
-          if (1 == termDocFreq) {
-            freqOut.writeVInt(newDocCode|1);
-           } else {
-            freqOut.writeVInt(newDocCode);
-            freqOut.writeVInt(termDocFreq);
-          }
-        } else {
-          // omitTf==true: we store only the docs, without
-          // term freq, positions, payloads
-          freqOut.writeVInt(doc-lastDoc);
+          posConsumer.finish();
         }
 
-        lastDoc = doc;
-
         if (!minState.nextDoc()) {
 
           // Remove from termStates
@@ -345,26 +269,10 @@
         }
       }
 
-      assert df > 0;
-
-      // Done merging this term
-
-      long skipPointer = skipListWriter.writeSkip(freqOut);
-
-      // Write term
-      termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
-
-      // TODO: we could do this incrementally
-      UnicodeUtil.UTF16toUTF8(text, start, termsUTF8);
-
-      // TODO: we could save O(n) re-scan of the term by
-      // computing the shared prefix with the last term
-      // while during the UTF8 encoding
-      termsOut.add(fieldNumber,
-                   termsUTF8.result,
-                   termsUTF8.length,
-                   termInfo);
+      docConsumer.finish();
     }
+
+    termsConsumer.finish();
   }
 
   private final TermInfo termInfo = new TermInfo(); // minimize consing

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java Sat Oct 25 03:40:00 2008
@@ -195,4 +195,8 @@
         return true;
     return false;
   }
+
+  static String segmentFileName(String segmentName, String ext) {
+    return segmentName + "." + ext;
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocConsumer.java Sat Oct 25 03:40:00 2008
@@ -29,10 +29,10 @@
   abstract void abort();
 
   /** Flush a new segment */
-  abstract void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException;
+  abstract void flush(Map threadsAndFields, SegmentWriteState state) throws IOException;
 
   /** Close doc stores */
-  abstract void closeDocStore(DocumentsWriter.FlushState state) throws IOException;
+  abstract void closeDocStore(SegmentWriteState state) throws IOException;
 
   /** Attempt to free RAM, returning true if any RAM was
    *  freed */

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Sat Oct 25 03:40:00 2008
@@ -22,8 +22,8 @@
 
 abstract class InvertedDocEndConsumer {
   abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
-  abstract void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException;
-  abstract void closeDocStore(DocumentsWriter.FlushState state) throws IOException;
+  abstract void flush(Map threadsAndFields, SegmentWriteState state) throws IOException;
+  abstract void closeDocStore(SegmentWriteState state) throws IOException;
   abstract void abort();
   abstract void setFieldInfos(FieldInfos fieldInfos);
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java Sat Oct 25 03:40:00 2008
@@ -54,7 +54,7 @@
 
   /** Produce _X.nrm if any document had a field with norms
    *  not disabled */
-  public void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException {
+  public void flush(Map threadsAndFields, SegmentWriteState state) throws IOException {
 
     final Map byField = new HashMap();
 
@@ -133,7 +133,7 @@
               }
             }
 
-            assert minDocID < state.numDocsInRAM;
+            assert minDocID < state.numDocs;
 
             // Fill hole
             for(;upto<minDocID;upto++)
@@ -154,16 +154,16 @@
           }
           
           // Fill final hole with defaultNorm
-          for(;upto<state.numDocsInRAM;upto++)
+          for(;upto<state.numDocs;upto++)
             normsOut.writeByte(defaultNorm);
         } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
           normCount++;
           // Fill entire field with default norm:
-          for(;upto<state.numDocsInRAM;upto++)
+          for(;upto<state.numDocs;upto++)
             normsOut.writeByte(defaultNorm);
         }
 
-        assert 4+normCount*state.numDocsInRAM == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocsInRAM) + " actual=" + normsOut.getFilePointer();
+        assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
       }
 
     } finally {
@@ -171,5 +171,5 @@
     }
   }
 
-  void closeDocStore(DocumentsWriter.FlushState state) {}
+  void closeDocStore(SegmentWriteState state) {}
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Sat Oct 25 03:40:00 2008
@@ -21,6 +21,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.HashSet;
 import java.util.List;
 
 import org.apache.lucene.document.Document;
@@ -476,38 +477,28 @@
       throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
   }
 
-  private IndexOutput freqOutput = null;
-  private IndexOutput proxOutput = null;
-  private TermInfosWriter termInfosWriter = null;
-  private int skipInterval;
-  private int maxSkipLevels;
   private SegmentMergeQueue queue = null;
-  private DefaultSkipListWriter skipListWriter = null;
 
   private final void mergeTerms() throws CorruptIndexException, IOException {
+
+    SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval);
+
+    final FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
+
     try {
-      freqOutput = directory.createOutput(segment + ".frq");
-      if (hasProx())
-        proxOutput = directory.createOutput(segment + ".prx");
-      termInfosWriter =
-              new TermInfosWriter(directory, segment, fieldInfos,
-                                  termIndexInterval);
-      skipInterval = termInfosWriter.skipInterval;
-      maxSkipLevels = termInfosWriter.maxSkipLevels;
-      skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput);
       queue = new SegmentMergeQueue(readers.size());
 
-      mergeTermInfos();
+      mergeTermInfos(consumer);
 
     } finally {
-      if (freqOutput != null) freqOutput.close();
-      if (proxOutput != null) proxOutput.close();
-      if (termInfosWriter != null) termInfosWriter.close();
+      consumer.finish();
       if (queue != null) queue.close();
     }
   }
 
-  private final void mergeTermInfos() throws CorruptIndexException, IOException {
+  boolean omitTF;
+
+  private final void mergeTermInfos(final FormatPostingsFieldsConsumer consumer) throws CorruptIndexException, IOException {
     int base = 0;
     final int readerCount = readers.size();
     for (int i = 0; i < readerCount; i++) {
@@ -533,6 +524,9 @@
 
     SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
 
+    String currentField = null;
+    FormatPostingsTermsConsumer termsConsumer = null;
+
     while (queue.size() > 0) {
       int matchSize = 0;			  // pop matching terms
       match[matchSize++] = (SegmentMergeInfo) queue.pop();
@@ -544,7 +538,16 @@
         top = (SegmentMergeInfo) queue.top();
       }
 
-      final int df = mergeTermInfo(match, matchSize);		  // add new TermInfo
+      if (currentField != term.field) {
+        currentField = term.field;
+        if (termsConsumer != null)
+          termsConsumer.finish();
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(currentField);
+        termsConsumer = consumer.addField(fieldInfo);
+        omitTF = fieldInfo.omitTf;
+      }
+
+      int df = appendPostings(termsConsumer, match, matchSize);		  // add new TermInfo
 
       if (checkAbort != null)
         checkAbort.work(df/3.0);
@@ -559,44 +562,6 @@
     }
   }
 
-  private final TermInfo termInfo = new TermInfo(); // minimize consing
-
-  /** Merge one term found in one or more segments. The array <code>smis</code>
-   *  contains segments that are positioned at the same term. <code>N</code>
-   *  is the number of cells in the array actually occupied.
-   *
-   * @param smis array of segments
-   * @param n number of cells in the array actually occupied
-   * @throws CorruptIndexException if the index is corrupt
-   * @throws IOException if there is a low-level IO error
-   */
-  private final int mergeTermInfo(SegmentMergeInfo[] smis, int n)
-          throws CorruptIndexException, IOException {
-    final long freqPointer = freqOutput.getFilePointer();
-    final long proxPointer;
-    if (proxOutput != null)
-      proxPointer = proxOutput.getFilePointer();
-    else
-      proxPointer = 0;
-
-    int df;
-    if (fieldInfos.fieldInfo(smis[0].term.field).omitTf) { // append posting data
-      df = appendPostingsNoTf(smis, n);     
-    } else{
-      df = appendPostings(smis, n);      
-    }
-    
-    long skipPointer = skipListWriter.writeSkip(freqOutput);
-
-    if (df > 0) {
-      // add an entry to the dictionary with pointers to prox and freq files
-      termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
-      termInfosWriter.add(smis[0].term, termInfo);
-    }
-
-    return df;
-  }
-  
   private byte[] payloadBuffer;
   private int[][] docMaps;
   int[][] getDocMaps() {
@@ -617,13 +582,11 @@
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
-  private final int appendPostings(SegmentMergeInfo[] smis, int n)
-          throws CorruptIndexException, IOException {
-    int lastDoc = 0;
-    int df = 0;					  // number of docs w/ term
-    skipListWriter.resetSkip();
-    boolean storePayloads = fieldInfos.fieldInfo(smis[0].term.field).storePayloads;
-    int lastPayloadLength = -1;   // ensures that we write the first length
+  private final int appendPostings(final FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
+        throws CorruptIndexException, IOException {
+
+    final FormatPostingsDocsConsumer docConsumer = termsConsumer.addTerm(smis[0].term.text);
+    int df = 0;
     for (int i = 0; i < n; i++) {
       SegmentMergeInfo smi = smis[i];
       TermPositions postings = smi.getPositions();
@@ -631,114 +594,37 @@
       int base = smi.base;
       int[] docMap = smi.getDocMap();
       postings.seek(smi.termEnum);
+
       while (postings.next()) {
+        df++;
         int doc = postings.doc();
         if (docMap != null)
           doc = docMap[doc];                      // map around deletions
         doc += base;                              // convert to merged space
 
-        if (doc < 0 || (df > 0 && doc <= lastDoc))
-          throw new CorruptIndexException("docs out of order (" + doc +
-              " <= " + lastDoc + " )");
+        final int freq = postings.freq();
+        final FormatPostingsPositionsConsumer posConsumer = docConsumer.addDoc(doc, freq);
 
-        df++;
-
-        if ((df % skipInterval) == 0) {
-          skipListWriter.setSkipData(lastDoc, storePayloads, lastPayloadLength);
-          skipListWriter.bufferSkip(df);
-        }
-
-        int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
-        lastDoc = doc;
-
-        int freq = postings.freq();
-        if (freq == 1) {
-          freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1
-        } else {
-          freqOutput.writeVInt(docCode);	  // write doc
-          freqOutput.writeVInt(freq);		  // write frequency in doc
-        }
-        
-        /** See {@link DocumentWriter#writePostings(Posting[], String)} for 
-         *  documentation about the encoding of positions and payloads
-         */
-        int lastPosition = 0;			  // write position deltas
-        for (int j = 0; j < freq; j++) {
-          int position = postings.nextPosition();
-          int delta = position - lastPosition;
-          if (storePayloads) {
-            int payloadLength = postings.getPayloadLength();
-            if (payloadLength == lastPayloadLength) {
-              proxOutput.writeVInt(delta * 2);
-            } else {
-              proxOutput.writeVInt(delta * 2 + 1);
-              proxOutput.writeVInt(payloadLength);
-              lastPayloadLength = payloadLength;
-            }
+        if (!omitTF) {
+          for (int j = 0; j < freq; j++) {
+            final int position = postings.nextPosition();
+            final int payloadLength = postings.getPayloadLength();
             if (payloadLength > 0) {
-              if (payloadBuffer == null || payloadBuffer.length < payloadLength) {
+              if (payloadBuffer == null || payloadBuffer.length < payloadLength)
                 payloadBuffer = new byte[payloadLength];
-              }
               postings.getPayload(payloadBuffer, 0);
-              proxOutput.writeBytes(payloadBuffer, 0, payloadLength);
             }
-          } else {
-            proxOutput.writeVInt(delta);
+            posConsumer.addPosition(position, payloadBuffer, 0, payloadLength);
           }
-          lastPosition = position;
+          posConsumer.finish();
         }
       }
     }
-    return df;
-  }
+    docConsumer.finish();
 
-  /** Process postings from multiple segments without tf, all positioned on the
-   *  same term. Writes out merged entries only into freqOutput, proxOut is not written.
-   *
-   * @param smis array of segments
-   * @param n number of cells in the array actually occupied
-   * @return number of documents across all segments where this term was found
-   * @throws CorruptIndexException if the index is corrupt
-   * @throws IOException if there is a low-level IO error
-   */
-  private final int appendPostingsNoTf(SegmentMergeInfo[] smis, int n)
-          throws CorruptIndexException, IOException {
-    int lastDoc = 0;
-    int df = 0;           // number of docs w/ term
-    skipListWriter.resetSkip();
-    int lastPayloadLength = -1;   // ensures that we write the first length
-    for (int i = 0; i < n; i++) {
-      SegmentMergeInfo smi = smis[i];
-      TermPositions postings = smi.getPositions();
-      assert postings != null;
-      int base = smi.base;
-      int[] docMap = smi.getDocMap();
-      postings.seek(smi.termEnum);
-      while (postings.next()) {
-        int doc = postings.doc();
-        if (docMap != null)
-          doc = docMap[doc];                      // map around deletions
-        doc += base;                              // convert to merged space
-
-        if (doc < 0 || (df > 0 && doc <= lastDoc))
-          throw new CorruptIndexException("docs out of order (" + doc +
-              " <= " + lastDoc + " )");
-
-        df++;
-
-        if ((df % skipInterval) == 0) {
-          skipListWriter.setSkipData(lastDoc, false, lastPayloadLength);
-          skipListWriter.bufferSkip(df);
-        }
-
-        int docCode = (doc - lastDoc);   
-        lastDoc = doc;
-        freqOutput.writeVInt(docCode);    // write doc & freq=1
-      }
-    }
     return df;
   }
-  
+
   private void mergeNorms() throws IOException {
     byte[] normBuffer = null;
     IndexOutput output = null;

Added: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=707836&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentWriteState.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentWriteState.java Sat Oct 25 03:40:00 2008
@@ -0,0 +1,50 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashSet;
+import java.util.Collection;
+
+import org.apache.lucene.store.Directory;
+
+class SegmentWriteState {
+  DocumentsWriter docWriter;
+  Directory directory;
+  String segmentName;
+  String docStoreSegmentName;
+  int numDocs;
+  int termIndexInterval;
+  int numDocsInStore;
+  Collection flushedFiles;
+
+  public SegmentWriteState(DocumentsWriter docWriter, Directory directory, String segmentName, String docStoreSegmentName, int numDocs,
+                           int numDocsInStore, int termIndexInterval) {
+    this.docWriter = docWriter;
+    this.directory = directory;
+    this.segmentName = segmentName;
+    this.docStoreSegmentName = docStoreSegmentName;
+    this.numDocs = numDocs;
+    this.numDocsInStore = numDocsInStore;
+    this.termIndexInterval = termIndexInterval;
+    flushedFiles = new HashSet();
+  }
+
+  public String segmentFileName(String ext) {
+    return segmentName + "." + ext;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentWriteState.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java Sat Oct 25 03:40:00 2008
@@ -40,7 +40,7 @@
     return new StoredFieldsWriterPerThread(docFieldProcessorPerThread, this);
   }
 
-  synchronized public void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException {
+  synchronized public void flush(Map threadsAndFields, SegmentWriteState state) throws IOException {
 
     if (state.numDocsInStore > 0) {
       // It's possible that all documents seen in this segment
@@ -72,7 +72,7 @@
     }
   }
 
-  synchronized public void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
+  synchronized public void closeDocStore(SegmentWriteState state) throws IOException {
     final int inc = state.numDocsInStore - lastDocID;
     if (inc > 0) {
       initFieldsWriter();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Sat Oct 25 03:40:00 2008
@@ -51,7 +51,7 @@
       postings[i] = new PostingList();
   }
 
-  synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
+  synchronized void flush(Map threadsAndFields, final SegmentWriteState state) throws IOException {
 
     if (tvx != null) {
 
@@ -80,7 +80,7 @@
     }
   }
 
-  synchronized void closeDocStore(final DocumentsWriter.FlushState state) throws IOException {
+  synchronized void closeDocStore(final SegmentWriteState state) throws IOException {
     if (tvx != null) {
       // At least one doc in this run had term vectors
       // enabled

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java Sat Oct 25 03:40:00 2008
@@ -85,7 +85,7 @@
       nextTermsHash.abort();
   }
 
-  void shrinkFreePostings(Map threadsAndFields, DocumentsWriter.FlushState state) {
+  void shrinkFreePostings(Map threadsAndFields, SegmentWriteState state) {
 
     assert postingsFreeCount == postingsAllocCount: Thread.currentThread().getName() + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer;
 
@@ -97,13 +97,13 @@
     }
   }
 
-  synchronized void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
+  synchronized void closeDocStore(SegmentWriteState state) throws IOException {
     consumer.closeDocStore(state);
     if (nextTermsHash != null)
       nextTermsHash.closeDocStore(state);
   }
 
-  synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
+  synchronized void flush(Map threadsAndFields, final SegmentWriteState state) throws IOException {
     Map childThreadsAndFields = new HashMap();
     Map nextThreadsAndFields;
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java?rev=707836&r1=707835&r2=707836&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java Sat Oct 25 03:40:00 2008
@@ -24,9 +24,9 @@
   abstract int bytesPerPosting();
   abstract void createPostings(RawPostingList[] postings, int start, int count);
   abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread);
-  abstract void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException;
+  abstract void flush(Map threadsAndFields, final SegmentWriteState state) throws IOException;
   abstract void abort();
-  abstract void closeDocStore(DocumentsWriter.FlushState state) throws IOException;
+  abstract void closeDocStore(SegmentWriteState state) throws IOException;
 
   FieldInfos fieldInfos;
 



Mime
View raw message