lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r677865 [4/5] - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/store/ src/java/org/apache/lucene/util/ src/test/org/apache/lucene/ src/test/org/apache/lucene/index/ src/test/org/apache/lucene/search/
Date Fri, 18 Jul 2008 09:20:14 GMT
Added: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,175 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.search.Similarity;
+
+// TODO FI: norms could actually be stored as doc store
+
+/** Writes norms.  Each thread X field accumulates the norms
+ *  for the doc/fields it saw, then the flush method below
+ *  merges all of these together into a single _X.nrm file.
+ */
+
+final class NormsWriter extends InvertedDocEndConsumer {
+
+  private static final byte defaultNorm = Similarity.encodeNorm(1.0f);
+  private FieldInfos fieldInfos;
+  public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
+    return new NormsWriterPerThread(docInverterPerThread, this);
+  }
+
+  public void abort() {}
+
+  // We only write the _X.nrm file at flush
+  void files(Collection files) {}
+
+  void setFieldInfos(FieldInfos fieldInfos) {
+    this.fieldInfos = fieldInfos;
+  }
+
+  /** Produce _X.nrm if any document had a field with norms
+   *  not disabled */
+  public void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException {
+
+    final Map byField = new HashMap();
+
+    // Typically, each thread will have encountered the same
+    // field.  So first we collate by field, ie, all
+    // per-thread field instances that correspond to the
+    // same FieldInfo
+    final Iterator it = threadsAndFields.entrySet().iterator();
+    while(it.hasNext()) {
+      Map.Entry entry = (Map.Entry) it.next();
+
+      Collection fields = (Collection) entry.getValue();
+      Iterator fieldsIt = fields.iterator();
+
+      while(fieldsIt.hasNext()) {
+        NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
+
+        if (perField.upto > 0) {
+          // It has some norms
+          List l = (List) byField.get(perField.fieldInfo);
+          if (l == null) {
+            l = new ArrayList();
+            byField.put(perField.fieldInfo, l);
+          }
+          l.add(perField);
+        } else
+          // Remove this field since we haven't seen it
+          // since the previous flush
+          fieldsIt.remove();
+      }
+    }
+
+    final String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
+    state.flushedFiles.add(normsFileName);
+    IndexOutput normsOut = state.directory.createOutput(normsFileName);
+
+    try {
+      normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
+
+      final int numField = fieldInfos.size();
+
+      int normCount = 0;
+
+      for(int fieldNumber=0;fieldNumber<numField;fieldNumber++) {
+
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+
+        List toMerge = (List) byField.get(fieldInfo);
+        int upto = 0;
+        if (toMerge != null) {
+
+          final int numFields = toMerge.size();
+
+          normCount++;
+
+          final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
+          int[] uptos = new int[numFields];
+
+          for(int j=0;j<numFields;j++)
+            fields[j] = (NormsWriterPerField) toMerge.get(j);
+
+          int numLeft = numFields;
+              
+          while(numLeft > 0) {
+
+            assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
+
+            int minLoc = 0;
+            int minDocID = fields[0].docIDs[uptos[0]];
+
+            for(int j=1;j<numLeft;j++) {
+              final int docID = fields[j].docIDs[uptos[j]];
+              if (docID < minDocID) {
+                minDocID = docID;
+                minLoc = j;
+              }
+            }
+
+            assert minDocID < state.numDocsInRAM;
+
+            // Fill hole
+            for(;upto<minDocID;upto++)
+              normsOut.writeByte(defaultNorm);
+
+            normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
+            (uptos[minLoc])++;
+            upto++;
+
+            if (uptos[minLoc] == fields[minLoc].upto) {
+              fields[minLoc].reset();
+              if (minLoc != numLeft-1) {
+                fields[minLoc] = fields[numLeft-1];
+                uptos[minLoc] = uptos[numLeft-1];
+              }
+              numLeft--;
+            }
+          }
+          
+          // Fill final hole with defaultNorm
+          for(;upto<state.numDocsInRAM;upto++)
+            normsOut.writeByte(defaultNorm);
+        } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
+          normCount++;
+          // Fill entire field with default norm:
+          for(;upto<state.numDocsInRAM;upto++)
+            normsOut.writeByte(defaultNorm);
+        }
+
+        assert 4+normCount*state.numDocsInRAM == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocsInRAM) + " actual=" + normsOut.getFilePointer();
+      }
+
+    } finally {
+      normsOut.close();
+    }
+  }
+
+  void closeDocStore(DocumentsWriter.FlushState state) {}
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerField.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerField.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,77 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.search.Similarity;
+
+/** Taps into DocInverter, as an InvertedDocEndConsumer,
+ *  which is called at the end of inverting each field.  We
+ *  just look at the length for the field (docState.length)
+ *  and record the norm. */
+
+final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable {
+
+  final NormsWriterPerThread perThread;
+  final FieldInfo fieldInfo;
+  final DocumentsWriter.DocState docState;
+
+  // Holds all docID/norm pairs we've seen
+  int[] docIDs = new int[1];
+  byte[] norms = new byte[1];
+  int upto;
+
+  final DocInverter.FieldInvertState fieldState;
+
+  public void reset() {
+    // Shrink back if we are overallocated now:
+    docIDs = ArrayUtil.shrink(docIDs, upto);
+    norms = ArrayUtil.shrink(norms, upto);
+    upto = 0;
+  }
+
+  public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) {
+    this.perThread = perThread;
+    this.fieldInfo = fieldInfo;
+    docState = perThread.docState;
+    fieldState = docInverterPerField.fieldState;
+  }
+
+  void abort() {
+    upto = 0;
+  }
+
+  public int compareTo(Object other) {
+    return fieldInfo.name.compareTo(((NormsWriterPerField) other).fieldInfo.name);
+  }
+  
+  void finish() {
+    assert docIDs.length == norms.length;
+    if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
+      if (docIDs.length <= upto) {
+        assert docIDs.length == upto;
+        docIDs = ArrayUtil.grow(docIDs, 1+upto);
+        norms = ArrayUtil.grow(norms, 1+upto);
+      }
+      final float norm = fieldState.boost * docState.similarity.lengthNorm(fieldInfo.name, fieldState.length);
+      norms[upto] = Similarity.encodeNorm(norm);
+      docIDs[upto] = docState.docID;
+      upto++;
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerThread.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerThread.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerThread.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,41 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class NormsWriterPerThread extends InvertedDocEndConsumerPerThread {
+  final NormsWriter normsWriter;
+  final DocumentsWriter.DocState docState;
+
+  public NormsWriterPerThread(DocInverterPerThread docInverterPerThread, NormsWriter normsWriter) {
+    this.normsWriter = normsWriter;
+    docState = docInverterPerThread.docState;
+  }
+
+  InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
+    return new NormsWriterPerField(docInverterPerField, this, fieldInfo);
+  }
+
+  void abort() {}
+
+  void startDocument() {}
+  void finishDocument() {}
+
+  boolean freeRAM() {
+    return false;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/NormsWriterPerThread.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/RawPostingList.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/RawPostingList.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/RawPostingList.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/RawPostingList.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,36 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** This is the base class for an in-memory posting list,
+ *  keyed by a Token.  {@link TermsHash} maintains a hash
+ *  table holding one instance of this per unique Token.
+ *  Consumers of TermsHash (@link TermsHashConsumer} must
+ *  subclass this class with its own concrete class.
+ *  {@link FreqProxTermsWriter.RawPostingList} is the
+ *  subclass used for the freq/prox postings, and {@link
+ *  TermVectorsTermsWriter.PostingList} is the subclass
+ *  used to hold TermVectors postings. */
+
+abstract class RawPostingList {
+  final static int BYTES_SIZE = DocumentsWriter.OBJECT_HEADER_BYTES + 3*DocumentsWriter.INT_NUM_BYTE;
+  int textStart;
+  int intStart;
+  int byteStart;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/RawPostingList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,190 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+import java.io.IOException;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.ArrayUtil;
+
+/** This is a DocFieldConsumer that writes stored fields. */
+final class StoredFieldsWriter extends DocFieldConsumer {
+
+  FieldsWriter fieldsWriter;
+  final DocumentsWriter docWriter;
+  int lastDocID;
+
+  PerDoc[] docFreeList = new PerDoc[1];
+  int freeCount;
+
+  public StoredFieldsWriter(DocumentsWriter docWriter) {
+    this.docWriter = docWriter;
+  }
+
+  public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException {
+    return new StoredFieldsWriterPerThread(docFieldProcessorPerThread, this);
+  }
+
+  synchronized public void flush(Map threadsAndFields, DocumentsWriter.FlushState state) throws IOException {
+
+    if (state.numDocsInStore > 0) {
+      // It's possible that all documents seen in this segment
+      // hit non-aborting exceptions, in which case we will
+      // not have yet init'd the FieldsWriter:
+      initFieldsWriter();
+
+      // Fill fdx file to include any final docs that we
+      // skipped because they hit non-aborting exceptions
+      fill(state.numDocsInStore - docWriter.getDocStoreOffset());
+    }
+
+    if (fieldsWriter != null)
+      fieldsWriter.flush();
+  }
+  
+  private void initFieldsWriter() throws IOException {
+    if (fieldsWriter == null) {
+      final String docStoreSegment = docWriter.getDocStoreSegment();
+      if (docStoreSegment != null) {
+        assert docStoreSegment != null;
+        fieldsWriter = new FieldsWriter(docWriter.directory,
+                                        docStoreSegment,
+                                        fieldInfos);
+        docWriter.addOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION);
+        docWriter.addOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+        lastDocID = 0;
+      }
+    }
+  }
+
+  synchronized public void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
+    final int inc = state.numDocsInStore - lastDocID;
+    if (inc > 0) {
+      initFieldsWriter();
+      fill(state.numDocsInStore - docWriter.getDocStoreOffset());
+    }
+
+    if (fieldsWriter != null) {
+      fieldsWriter.close();
+      fieldsWriter = null;
+      lastDocID = 0;
+      assert state.docStoreSegmentName != null;
+      state.flushedFiles.add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
+      state.flushedFiles.add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+
+      state.docWriter.removeOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
+      state.docWriter.removeOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+
+      if (4+state.numDocsInStore*8 != state.directory.fileLength(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION))
+        throw new RuntimeException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.fileLength(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) + " length in bytes of " + state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+    }
+  }
+
+  int allocCount;
+
+  synchronized PerDoc getPerDoc() {
+    if (freeCount == 0) {
+      allocCount++;
+      if (allocCount > docFreeList.length) {
+        // Grow our free list up front to make sure we have
+        // enough space to recycle all outstanding PerDoc
+        // instances
+        assert allocCount == 1+docFreeList.length;
+        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+      }
+      return new PerDoc();
+    } else
+      return docFreeList[--freeCount];
+  }
+
+  synchronized void abort() {
+    if (fieldsWriter != null) {
+      try {
+        fieldsWriter.close();
+      } catch (Throwable t) {
+      }
+      fieldsWriter = null;
+      lastDocID = 0;
+    }
+  }
+
+  /** Fills in any hole in the docIDs */
+  void fill(int docID) throws IOException {
+    final int docStoreOffset = docWriter.getDocStoreOffset();
+
+    // We must "catch up" for all docs before us
+    // that had no stored fields:
+    final int end = docID+docStoreOffset;
+    while(lastDocID < end) {
+      fieldsWriter.skipDocument();
+      lastDocID++;
+    }
+  }
+
+  synchronized void finishDocument(PerDoc perDoc) throws IOException {
+    assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start");
+    initFieldsWriter();
+
+    fill(perDoc.docID);
+
+    // Append stored fields to the real FieldsWriter:
+    fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt);
+    lastDocID++;
+    perDoc.reset();
+    free(perDoc);
+    assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end");
+  }
+
+  public boolean freeRAM() {
+    return false;
+  }
+
+  synchronized void free(PerDoc perDoc) {
+    assert freeCount < docFreeList.length;
+    assert 0 == perDoc.numStoredFields;
+    assert 0 == perDoc.fdt.length();
+    assert 0 == perDoc.fdt.getFilePointer();
+    docFreeList[freeCount++] = perDoc;
+  }
+
+  class PerDoc extends DocumentsWriter.DocWriter {
+
+    // TODO: use something more memory efficient; for small
+    // docs the 1024 buffer size of RAMOutputStream wastes alot
+    RAMOutputStream fdt = new RAMOutputStream();
+    int numStoredFields;
+
+    void reset() {
+      fdt.reset();
+      numStoredFields = 0;
+    }
+
+    void abort() {
+      reset();
+      free(this);
+    }
+
+    public long sizeInBytes() {
+      return fdt.sizeInBytes();
+    }
+
+    public void finish() throws IOException {
+      finishDocument(this);
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,66 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Fieldable;
+
+final class StoredFieldsWriterPerField extends DocFieldConsumerPerField {
+
+  final StoredFieldsWriterPerThread perThread;
+  final FieldInfo fieldInfo;
+  final DocumentsWriter.DocState docState;
+
+  public StoredFieldsWriterPerField(StoredFieldsWriterPerThread perThread, FieldInfo fieldInfo) {
+    this.perThread = perThread;
+    this.fieldInfo = fieldInfo;
+    docState = perThread.docState;
+  }
+
+  // Process all occurrences of a single field in one doc;
+  // count is 1 if a given field occurs only once in the
+  // Document, which is the "typical" case
+  public void processFields(Fieldable[] fields, int count) throws IOException {
+
+    final StoredFieldsWriter.PerDoc doc;
+    if (perThread.doc == null) {
+      doc = perThread.doc = perThread.storedFieldsWriter.getPerDoc();
+      doc.docID = docState.docID;
+      perThread.localFieldsWriter.setFieldsStream(doc.fdt);
+      assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields;
+      assert 0 == doc.fdt.length();
+      assert 0 == doc.fdt.getFilePointer();
+    } else {
+      doc = perThread.doc;
+      assert doc.docID == docState.docID: "doc.docID=" + doc.docID + " docState.docID=" + docState.docID;
+    }
+
+    for(int i=0;i<count;i++) {
+      final Fieldable field = fields[i];
+      if (field.isStored()) {
+        perThread.localFieldsWriter.writeField(fieldInfo, field);
+        assert docState.testPoint("StoredFieldsWriterPerField.processFields.writeField");
+        doc.numStoredFields++;
+      }
+    }
+  }
+
+  void abort() {
+  }
+}
+

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,67 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.store.IndexOutput;
+
+final class StoredFieldsWriterPerThread extends DocFieldConsumerPerThread {
+
+  final FieldsWriter localFieldsWriter;
+  final StoredFieldsWriter storedFieldsWriter;
+  final DocumentsWriter.DocState docState;
+
+  StoredFieldsWriter.PerDoc doc;
+
+  public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter) throws IOException {
+    this.storedFieldsWriter = storedFieldsWriter;
+    this.docState = docFieldProcessorPerThread.docState;
+    localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
+  }
+
+  public void startDocument() {
+    if (doc != null) {
+      // Only happens if previous document hit non-aborting
+      // exception while writing stored fields into
+      // localFieldsWriter:
+      doc.reset();
+      doc.docID = docState.docID;
+    }
+  }
+
+  public DocumentsWriter.DocWriter finishDocument() {
+    // If there were any stored fields in this doc, doc will
+    // be non-null; else it's null.
+    try {
+      return doc;
+    } finally {
+      doc = null;
+    }
+  }
+
+  public void abort() {
+    if (doc != null) {
+      doc.abort();
+      doc = null;
+    }
+  }
+
+  public DocFieldConsumerPerField addField(FieldInfo fieldInfo) {
+    return new StoredFieldsWriterPerField(this, fieldInfo);
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,291 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.ArrayUtil;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+
+final class TermVectorsTermsWriter extends TermsHashConsumer {
+
+  final DocumentsWriter docWriter;
+  TermVectorsWriter termVectorsWriter;
+  PerDoc[] docFreeList = new PerDoc[1];
+  int freeCount;
+  IndexOutput tvx;
+  IndexOutput tvd;
+  IndexOutput tvf;
+  int lastDocID;
+
+  public TermVectorsTermsWriter(DocumentsWriter docWriter) {
+    this.docWriter = docWriter;
+    streamCount = 2;
+  }
+
+  public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
+    return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
+  }
+
+  void createPostings(RawPostingList[] postings, int start, int count) {
+    final int end = start + count;
+    for(int i=start;i<end;i++)
+      postings[i] = new PostingList();
+  }
+
+  synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
+
+    if (tvx != null) {
+
+      if (state.numDocsInStore > 0)
+        // In case there are some final documents that we
+        // didn't see (because they hit a non-aborting exception):
+        fill(state.numDocsInStore - docWriter.getDocStoreOffset());
+
+      tvx.flush();
+      tvd.flush();
+      tvf.flush();
+    }
+
+    Iterator it = threadsAndFields.entrySet().iterator();
+    while(it.hasNext()) {
+      Map.Entry entry = (Map.Entry) it.next();
+      Iterator it2 = ((Collection) entry.getValue()).iterator();
+      while(it2.hasNext()) {
+        TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) it2.next();
+        perField.termsHashPerField.reset();
+        perField.shrinkHash();
+      }
+
+      TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
+      perThread.termsHashPerThread.reset(true);
+    }
+  }
+
+  synchronized void closeDocStore(final DocumentsWriter.FlushState state) throws IOException {
+    if (tvx != null) {
+      // At least one doc in this run had term vectors
+      // enabled
+      fill(state.numDocsInStore - docWriter.getDocStoreOffset());
+      tvx.close();
+      tvf.close();
+      tvd.close();
+      tvx = null;
+      assert state.docStoreSegmentName != null;
+      if (4+state.numDocsInStore*16 != state.directory.fileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
+        throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.fileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + " length in bytes of " + state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+
+      state.flushedFiles.add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+      state.flushedFiles.add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+      state.flushedFiles.add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+      docWriter.removeOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+      docWriter.removeOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+      docWriter.removeOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+      lastDocID = 0;
+    }    
+  }
+
+  int allocCount;
+
+  synchronized PerDoc getPerDoc() {
+    if (freeCount == 0) {
+      allocCount++;
+      if (allocCount > docFreeList.length) {
+        // Grow our free list up front to make sure we have
+        // enough space to recycle all outstanding PerDoc
+        // instances
+        assert allocCount == 1+docFreeList.length;
+        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+      }
+      return new PerDoc();
+    } else
+      return docFreeList[--freeCount];
+  }
+
+  /** Fills in no-term-vectors for all docs we haven't seen
+   *  since the last doc that had term vectors. */
+  void fill(int docID) throws IOException {
+    final int docStoreOffset = docWriter.getDocStoreOffset();
+    final int end = docID+docStoreOffset;
+    if (lastDocID < end) {
+      final long tvfPosition = tvf.getFilePointer();
+      while(lastDocID < end) {
+        tvx.writeLong(tvd.getFilePointer());
+        tvd.writeVInt(0);
+        tvx.writeLong(tvfPosition);
+        lastDocID++;
+      }
+    }
+  }
+
+  synchronized void initTermVectorsWriter() throws IOException {        
+    if (tvx == null) {
+      
+      final String docStoreSegment = docWriter.getDocStoreSegment();
+
+      if (docStoreSegment == null)
+        return;
+
+      assert docStoreSegment != null;
+
+      // If we hit an exception while init'ing the term
+      // vector output files, we must abort this segment
+      // because those files will be in an unknown
+      // state:
+      tvx = docWriter.directory.createOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+      tvd = docWriter.directory.createOutput(docStoreSegment +  "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+      tvf = docWriter.directory.createOutput(docStoreSegment +  "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+      
+      tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
+      tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
+      tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
+
+      docWriter.addOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+      docWriter.addOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+      docWriter.addOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+      lastDocID = 0;
+    }
+  }
+
+  synchronized void finishDocument(PerDoc perDoc) throws IOException {
+
+    assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
+
+    initTermVectorsWriter();
+
+    fill(perDoc.docID);
+
+    // Append term vectors to the real outputs:
+    tvx.writeLong(tvd.getFilePointer());
+    tvx.writeLong(tvf.getFilePointer());
+    tvd.writeVInt(perDoc.numVectorFields);
+    if (perDoc.numVectorFields > 0) {
+      for(int i=0;i<perDoc.numVectorFields;i++)
+        tvd.writeVInt(perDoc.fieldNumbers[i]);
+      assert 0 == perDoc.fieldPointers[0];
+      long lastPos = perDoc.fieldPointers[0];
+      for(int i=1;i<perDoc.numVectorFields;i++) {
+        long pos = perDoc.fieldPointers[i];
+        tvd.writeVLong(pos-lastPos);
+        lastPos = pos;
+      }
+      perDoc.tvf.writeTo(tvf);
+      perDoc.tvf.reset();
+      perDoc.numVectorFields = 0;
+    }
+
+    assert lastDocID == perDoc.docID + docWriter.getDocStoreOffset();
+
+    lastDocID++;
+
+    free(perDoc);
+    assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
+  }
+
+  public boolean freeRAM() {
+    // We don't hold any state beyond one doc, so we don't
+    // free persistent RAM here
+    return false;
+  }
+
+  public void abort() {
+    if (tvx != null) {
+      try {
+        tvx.close();
+      } catch (Throwable t) {
+      }
+      tvx = null;
+    }
+    if (tvd != null) {
+      try {
+        tvd.close();
+      } catch (Throwable t) {
+      }
+      tvd = null;
+    }
+    if (tvf != null) {
+      try {
+        tvf.close();
+      } catch (Throwable t) {
+      }
+      tvf = null;
+    }
+    lastDocID = 0;
+  }
+
+  synchronized void free(PerDoc doc) {
+    assert freeCount < docFreeList.length;
+    docFreeList[freeCount++] = doc;
+  }
+
+  class PerDoc extends DocumentsWriter.DocWriter {
+
+    // TODO: use something more memory efficient; for small
+    // docs the 1024 buffer size of RAMOutputStream wastes alot
+    RAMOutputStream tvf = new RAMOutputStream();
+    int numVectorFields;
+
+    int[] fieldNumbers = new int[1];
+    long[] fieldPointers = new long[1];
+
+    void reset() {
+      tvf.reset();
+      numVectorFields = 0;
+    }
+
+    void abort() {
+      reset();
+      free(this);
+    }
+
+    void addField(final int fieldNumber) {
+      if (numVectorFields == fieldNumbers.length) {
+        fieldNumbers = ArrayUtil.grow(fieldNumbers);
+        fieldPointers = ArrayUtil.grow(fieldPointers);
+      }
+      fieldNumbers[numVectorFields] = fieldNumber;
+      fieldPointers[numVectorFields] = tvf.getFilePointer();
+      numVectorFields++;
+    }
+
+    public long sizeInBytes() {
+      return tvf.sizeInBytes();
+    }
+
+    public void finish() throws IOException {
+      finishDocument(this);
+    }
+  }
+
+  static final class PostingList extends RawPostingList {
+    int freq;                                       // How many times this term occurred in the current doc
+    int lastOffset;                                 // Last offset we saw
+    int lastPosition;                               // Last position where this term occurred
+  }
+
+  int bytesPerPosting() {
+    return RawPostingList.BYTES_SIZE + 3 * DocumentsWriter.INT_NUM_BYTE;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,235 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.store.IndexOutput;
+
+final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
+
+  final TermVectorsTermsWriterPerThread perThread;
+  final TermsHashPerField termsHashPerField;
+  final TermVectorsTermsWriter termsWriter;
+  final FieldInfo fieldInfo;
+  final DocumentsWriter.DocState docState;
+  final DocInverter.FieldInvertState fieldState;
+
+  boolean doVectors;
+  boolean doVectorPositions;
+  boolean doVectorOffsets;
+
+  int maxNumPostings;
+
+  public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) {
+    this.termsHashPerField = termsHashPerField;
+    this.perThread = perThread;
+    this.termsWriter = perThread.termsWriter;
+    this.fieldInfo = fieldInfo;
+    docState = termsHashPerField.docState;
+    fieldState = termsHashPerField.fieldState;
+  }
+
+  boolean start(Fieldable[] fields, int count) {
+    doVectors = false;
+    doVectorPositions = false;
+    doVectorOffsets = false;
+
+    for(int i=0;i<count;i++) {
+      Fieldable field = fields[i];
+      if (field.isIndexed() && field.isTermVectorStored()) {
+        doVectors = true;
+        doVectorPositions |= field.isStorePositionWithTermVector();
+        doVectorOffsets |= field.isStoreOffsetWithTermVector();
+      }
+    }
+
+    if (doVectors) {
+      if (perThread.doc == null) {
+        perThread.doc = termsWriter.getPerDoc();
+        perThread.doc.docID = docState.docID;
+        assert perThread.doc.numVectorFields == 0;
+        assert 0 == perThread.doc.tvf.length();
+        assert 0 == perThread.doc.tvf.getFilePointer();
+      } else {
+        assert perThread.doc.docID == docState.docID;
+
+        if (termsHashPerField.numPostings != 0)
+          // Only necessary if previous doc hit a
+          // non-aborting exception while writing vectors in
+          // this field:
+          termsHashPerField.reset();
+      }
+    }
+
+    // TODO: only if needed for performance
+    //perThread.postingsCount = 0;
+
+    return doVectors;
+  }     
+
+  public void abort() {}
+
+  /** Called once per field per document if term vectors
+   *  are enabled, to write the vectors to
+   *  RAMOutputStream, which is then quickly flushed to
+   *  * the real term vectors files in the Directory. */
+  void finish() throws IOException {
+
+    assert docState.testPoint("TermVectorsTermsWriterPerField.finish start");
+
+    final int numPostings = termsHashPerField.numPostings;
+
+    assert numPostings >= 0;
+
+    if (!doVectors || numPostings == 0)
+      return;
+
+    if (numPostings > maxNumPostings)
+      maxNumPostings = numPostings;
+
+    final IndexOutput tvf = perThread.doc.tvf;
+
+    // This is called once, after inverting all occurences
+    // of a given field in the doc.  At this point we flush
+    // our hash into the DocWriter.
+
+    assert fieldInfo.storeTermVector;
+    assert perThread.vectorFieldsInOrder(fieldInfo);
+
+    perThread.doc.addField(termsHashPerField.fieldInfo.number);
+
+    final RawPostingList[] postings = termsHashPerField.sortPostings();
+
+    tvf.writeVInt(numPostings);
+    byte bits = 0x0;
+    if (doVectorPositions)
+      bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
+    if (doVectorOffsets) 
+      bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
+    tvf.writeByte(bits);
+
+    int encoderUpto = 0;
+    int lastTermBytesCount = 0;
+
+    final ByteSliceReader reader = perThread.vectorSliceReader;
+    final char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers;
+    for(int j=0;j<numPostings;j++) {
+      final TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList) postings[j];
+      final int freq = posting.freq;
+          
+      final char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+      final int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+      // We swap between two encoders to save copying
+      // last Term's byte array
+      final UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto];
+
+      // TODO: we could do this incrementally
+      UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result);
+      final int termBytesCount = utf8Result.length;
+
+      // TODO: UTF16toUTF8 could tell us this prefix
+      // Compute common prefix between last term and
+      // this term
+      int prefix = 0;
+      if (j > 0) {
+        final byte[] lastTermBytes = perThread.utf8Results[1-encoderUpto].result;
+        final byte[] termBytes = perThread.utf8Results[encoderUpto].result;
+        while(prefix < lastTermBytesCount && prefix < termBytesCount) {
+          if (lastTermBytes[prefix] != termBytes[prefix])
+            break;
+          prefix++;
+        }
+      }
+      encoderUpto = 1-encoderUpto;
+      lastTermBytesCount = termBytesCount;
+
+      final int suffix = termBytesCount - prefix;
+      tvf.writeVInt(prefix);
+      tvf.writeVInt(suffix);
+      tvf.writeBytes(utf8Result.result, prefix, suffix);
+      tvf.writeVInt(freq);
+
+      if (doVectorPositions) {
+        termsHashPerField.initReader(reader, posting, 0);
+        reader.writeTo(tvf);
+      }
+
+      if (doVectorOffsets) {
+        termsHashPerField.initReader(reader, posting, 1);
+        reader.writeTo(tvf);
+      }
+    }
+
+    termsHashPerField.reset();
+    perThread.termsHashPerThread.reset(false);
+  }
+
+  void shrinkHash() {
+    termsHashPerField.shrinkHash(maxNumPostings);
+    maxNumPostings = 0;
+  }
+
+  void newTerm(Token t, RawPostingList p0) {
+
+    assert docState.testPoint("TermVectorsTermsWriterPerField.newTerm start");
+
+    TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0;
+
+    p.freq = 1;
+
+    if (doVectorOffsets) {
+      final int startOffset = fieldState.offset + t.startOffset();
+      final int endOffset = fieldState.offset + t.endOffset();
+      termsHashPerField.writeVInt(1, startOffset);
+      termsHashPerField.writeVInt(1, endOffset - startOffset);
+      p.lastOffset = endOffset;
+    }
+
+    if (doVectorPositions) {
+      termsHashPerField.writeVInt(0, fieldState.position);
+      p.lastPosition = fieldState.position;
+    }
+  }
+
+  void addTerm(Token t, RawPostingList p0) {
+
+    assert docState.testPoint("TermVectorsTermsWriterPerField.addTerm start");
+
+    TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0;
+    p.freq++;
+
+    if (doVectorOffsets) {
+      final int startOffset = fieldState.offset + t.startOffset();
+      final int endOffset = fieldState.offset + t.endOffset();
+      termsHashPerField.writeVInt(1, startOffset - p.lastOffset);
+      termsHashPerField.writeVInt(1, endOffset - startOffset);
+      p.lastOffset = endOffset;
+    }
+
+    if (doVectorPositions) {
+      termsHashPerField.writeVInt(0, fieldState.position - p.lastPosition);
+      p.lastPosition = fieldState.position;
+    }
+  }
+
+  void skippingLongTerm(Token t) {}
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,87 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.UnicodeUtil;
+
+final class TermVectorsTermsWriterPerThread extends TermsHashConsumerPerThread {
+
+  final TermVectorsTermsWriter termsWriter;
+  final TermsHashPerThread termsHashPerThread;
+  final DocumentsWriter.DocState docState;
+
+  TermVectorsTermsWriter.PerDoc doc;
+
+  public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter) {
+    this.termsWriter = termsWriter;
+    this.termsHashPerThread = termsHashPerThread;
+    docState = termsHashPerThread.docState;
+  }
+  
+  // Used by perField when serializing the term vectors
+  final ByteSliceReader vectorSliceReader = new ByteSliceReader();
+
+  final UnicodeUtil.UTF8Result utf8Results[] = {new UnicodeUtil.UTF8Result(),
+                                                new UnicodeUtil.UTF8Result()};
+
+  public void startDocument() {
+    assert clearLastVectorFieldName();
+    if (doc != null) {
+      doc.reset();
+      doc.docID = docState.docID;
+    }
+  }
+
+  public DocumentsWriter.DocWriter finishDocument() {
+    try {
+      return doc;
+    } finally {
+      doc = null;
+    }
+  }
+
+  public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
+    return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+  }
+
+  public void abort() {
+    if (doc != null) {
+      doc.abort();
+      doc = null;
+    }
+  }
+
+  // Called only by assert
+  final boolean clearLastVectorFieldName() {
+    lastVectorFieldName = null;
+    return true;
+  }
+
+  // Called only by assert
+  String lastVectorFieldName;
+  final boolean vectorFieldsInOrder(FieldInfo fi) {
+    try {
+      if (lastVectorFieldName != null)
+        return lastVectorFieldName.compareTo(fi.name) < 0;
+      else
+        return true;
+    } finally {
+      lastVectorFieldName = fi.name;
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,244 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.io.IOException;
+
+import org.apache.lucene.util.ArrayUtil;
+
+/** This class implements {@link InvertedDocConsumer}, which
+ *  is passed each token produced by the analyzer on each
+ *  field.  It stores these tokens in a hash table, and
+ *  allocates separate byte streams per token.  Consumers of
+ *  this class, eg {@link FreqProxTermsWriter} and {@link
+ *  TermVectorsTermsWriter}, write their own byte streams
+ *  under each term.
+ */
+
+final class TermsHash extends InvertedDocConsumer {
+
+  final TermsHashConsumer consumer;
+  final TermsHash nextTermsHash;
+  final int bytesPerPosting;
+  final int postingsFreeChunk;
+  final int streamCount;
+  final DocumentsWriter docWriter;
+  
+  TermsHash primaryTermsHash;
+
+  RawPostingList[] postingsFreeList = new RawPostingList[1];
+  int postingsFreeCount;
+  int postingsAllocCount;
+  boolean trackAllocations;
+
+  public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
+    this.docWriter = docWriter;
+    this.consumer = consumer;
+    this.streamCount = consumer.streamCount;
+    this.nextTermsHash = nextTermsHash;
+    this.trackAllocations = trackAllocations;
+
+    // Why + 4*POINTER_NUM_BYTE below?
+    //   +1: Posting is referenced by postingsFreeList array
+    //   +3: Posting is referenced by hash, which
+    //       targets 25-50% fill factor; approximate this
+    //       as 3X # pointers
+    bytesPerPosting = consumer.bytesPerPosting() + 4*DocumentsWriter.POINTER_NUM_BYTE;
+    postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting);
+  }
+
+  InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
+    return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
+  }
+
+  TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) {
+    return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
+  }
+
+  void setFieldInfos(FieldInfos fieldInfos) {
+    this.fieldInfos = fieldInfos;
+    consumer.setFieldInfos(fieldInfos);
+  }
+
+  synchronized public void abort() {
+    consumer.abort();
+    if (nextTermsHash != null)
+      nextTermsHash.abort();
+  }
+
+  void shrinkFreePostings(Map threadsAndFields, DocumentsWriter.FlushState state) {
+
+    assert postingsFreeCount == postingsAllocCount: Thread.currentThread().getName() + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer;
+
+    final int newSize = ArrayUtil.getShrinkSize(postingsFreeList.length, postingsAllocCount);
+    if (newSize != postingsFreeList.length) {
+      RawPostingList[] newArray = new RawPostingList[newSize];
+      System.arraycopy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
+      postingsFreeList = newArray;
+    }
+  }
+
+  synchronized void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
+    consumer.closeDocStore(state);
+    if (nextTermsHash != null)
+      nextTermsHash.closeDocStore(state);
+  }
+
+  synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
+    Map childThreadsAndFields = new HashMap();
+    Map nextThreadsAndFields;
+
+    if (nextTermsHash != null)
+      nextThreadsAndFields = new HashMap();
+    else
+      nextThreadsAndFields = null;
+
+    Iterator it = threadsAndFields.entrySet().iterator();
+    while(it.hasNext()) {
+
+      Map.Entry entry = (Map.Entry) it.next();
+
+      TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey();
+
+      Collection fields = (Collection) entry.getValue();
+
+      Iterator fieldsIt = fields.iterator();
+      Collection childFields = new HashSet();
+      Collection nextChildFields;
+
+      if (nextTermsHash != null)
+        nextChildFields = new HashSet();
+      else
+        nextChildFields = null;
+
+      while(fieldsIt.hasNext()) {
+        TermsHashPerField perField = (TermsHashPerField) fieldsIt.next();
+        childFields.add(perField.consumer);
+        if (nextTermsHash != null)
+          nextChildFields.add(perField.nextPerField);
+      }
+
+      childThreadsAndFields.put(perThread.consumer, childFields);
+      if (nextTermsHash != null)
+        nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields);
+    }
+    
+    consumer.flush(childThreadsAndFields, state);
+
+    shrinkFreePostings(threadsAndFields, state);
+    
+    if (nextTermsHash != null)
+      nextTermsHash.flush(nextThreadsAndFields, state);
+  }
+
+  synchronized public boolean freeRAM() {
+
+    if (!trackAllocations)
+      return false;
+
+    boolean any;
+    final int numToFree;
+    if (postingsFreeCount >= postingsFreeChunk)
+      numToFree = postingsFreeChunk;
+    else
+      numToFree = postingsFreeCount;
+    any = numToFree > 0;
+    if (any) {
+      Arrays.fill(postingsFreeList, postingsFreeCount-numToFree, postingsFreeCount, null);
+      postingsFreeCount -= numToFree;
+      postingsAllocCount -= numToFree;
+      docWriter.bytesAllocated(-numToFree * bytesPerPosting);
+      any = true;
+    }
+
+    if (nextTermsHash != null)
+      any |= nextTermsHash.freeRAM();
+
+    return any;
+  }
+
+  // USE ONLY FOR DEBUGGING!
+  /*
+    public String getPostingText() {
+    char[] text = charPool.buffers[p.textStart >> CHAR_BLOCK_SHIFT];
+    int upto = p.textStart & CHAR_BLOCK_MASK;
+    while(text[upto] != 0xffff)
+    upto++;
+    return new String(text, p.textStart, upto-(p.textStart & BYTE_BLOCK_MASK));
+    }
+  */
+
+  synchronized public void recyclePostings(final RawPostingList[] postings, final int numPostings) {
+
+    assert postings.length >= numPostings;
+
+    // Move all Postings from this ThreadState back to our
+    // free list.  We pre-allocated this array while we were
+    // creating Postings to make sure it's large enough
+    assert postingsFreeCount + numPostings <= postingsFreeList.length;
+    System.arraycopy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
+    postingsFreeCount += numPostings;
+  }
+
+  synchronized public void getPostings(final RawPostingList[] postings) {
+
+    assert docWriter.writer.testPoint("TermsHash.getPostings start");
+
+    assert postingsFreeCount <= postingsFreeList.length;
+    assert postingsFreeCount <= postingsAllocCount: "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount;
+
+    final int numToCopy;
+    if (postingsFreeCount < postings.length)
+      numToCopy = postingsFreeCount;
+    else
+      numToCopy = postings.length;
+    final int start = postingsFreeCount-numToCopy;
+    assert start >= 0;
+    assert start + numToCopy <= postingsFreeList.length;
+    assert numToCopy <= postings.length;
+    System.arraycopy(postingsFreeList, start,
+                     postings, 0, numToCopy);
+
+    // Directly allocate the remainder if any
+    if (numToCopy < postings.length) {
+      final int extra = postings.length - numToCopy;
+      final int newPostingsAllocCount = postingsAllocCount + extra;
+
+      if (newPostingsAllocCount > postingsFreeList.length)
+        postingsFreeList = new RawPostingList[ArrayUtil.getNextSize(newPostingsAllocCount)];
+
+      consumer.createPostings(postings, numToCopy, extra);
+      assert docWriter.writer.testPoint("TermsHash.getPostings after create");
+      postingsAllocCount += extra;
+
+      if (trackAllocations)
+        docWriter.bytesAllocated(extra * bytesPerPosting);
+    }
+
+    postingsFreeCount -= numToCopy;
+
+    if (trackAllocations)
+      docWriter.bytesUsed(postings.length * bytesPerPosting);
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,38 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+
+abstract class TermsHashConsumer {
+  abstract int bytesPerPosting();
+  abstract void createPostings(RawPostingList[] postings, int start, int count);
+  abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread);
+  abstract void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException;
+  abstract void abort();
+  abstract void closeDocStore(DocumentsWriter.FlushState state) throws IOException;
+
+  int streamCount;
+
+  FieldInfos fieldInfos;
+
+  void setFieldInfos(FieldInfos fieldInfos) {
+    this.fieldInfos = fieldInfos;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,35 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Implement this class to plug into the TermsHash
+ *  processor, which inverts & stores Tokens into a hash
+ *  table and provides an API for writing bytes into
+ *  multiple streams for each unique Token. */
+
+import java.io.IOException;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.analysis.Token;
+
+abstract class TermsHashConsumerPerField {
+  abstract boolean start(Fieldable[] fields, int count) throws IOException;
+  abstract void finish() throws IOException;
+  abstract void skippingLongTerm(Token t) throws IOException;
+  abstract void newTerm(Token t, RawPostingList p) throws IOException;
+  abstract void addTerm(Token t, RawPostingList p) throws IOException;
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,27 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+abstract class TermsHashConsumerPerThread {
+  abstract void startDocument() throws IOException;
+  abstract DocumentsWriter.DocWriter finishDocument() throws IOException;
+  abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
+  abstract public void abort();
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,545 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.util.UnicodeUtil;
+
+final class TermsHashPerField extends InvertedDocConsumerPerField {
+
+  final TermsHashConsumerPerField consumer;
+  final TermsHashPerField nextPerField;
+  final TermsHashPerThread perThread;
+  final DocumentsWriter.DocState docState;
+  final DocInverter.FieldInvertState fieldState;
+
+  // Copied from our perThread
+  final CharBlockPool charPool;
+  final IntBlockPool intPool;
+  final ByteBlockPool bytePool;
+
+  final int streamCount;
+  final int numPostingInt;
+
+  final FieldInfo fieldInfo;
+
+  boolean postingsCompacted;
+  int numPostings;
+  private int postingsHashSize = 4;
+  private int postingsHashHalfSize = postingsHashSize/2;
+  private int postingsHashMask = postingsHashSize-1;
+  private RawPostingList[] postingsHash = new RawPostingList[postingsHashSize];
+  private RawPostingList p;
+
+  public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
+    this.perThread = perThread;
+    intPool = perThread.intPool;
+    charPool = perThread.charPool;
+    bytePool = perThread.bytePool;
+    docState = perThread.docState;
+    fieldState = docInverterPerField.fieldState;
+    streamCount = perThread.termsHash.streamCount;
+    numPostingInt = 2*streamCount;
+    this.consumer = perThread.consumer.addField(this, fieldInfo);
+    this.fieldInfo = fieldInfo;
+    if (nextPerThread != null)
+      nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
+    else
+      nextPerField = null;
+  }
+
+  void shrinkHash(int targetSize) {
+    assert postingsCompacted || numPostings == 0;
+
+    // Cannot use ArrayUtil.shrink because we require power
+    // of 2:
+    int newSize = postingsHash.length;
+    while(newSize >= 8 && newSize/4 > targetSize) {
+      newSize /= 2;
+    }
+
+    if (newSize != postingsHash.length) {
+      postingsHash = new RawPostingList[newSize];
+      postingsHashSize = newSize;
+      postingsHashHalfSize = newSize/2;
+      postingsHashMask = newSize-1;
+    }
+  }
+
+  public void reset() {
+    if (!postingsCompacted)
+      compactPostings();
+    assert numPostings <= postingsHash.length;
+    if (numPostings > 0) {
+      perThread.termsHash.recyclePostings(postingsHash, numPostings);
+      Arrays.fill(postingsHash, 0, numPostings, null);
+      numPostings = 0;
+    }
+    postingsCompacted = false;
+    if (nextPerField != null)
+      nextPerField.reset();
+  }
+
+  synchronized public void abort() {
+    reset();
+    if (nextPerField != null)
+      nextPerField.abort();
+  }
+
+  public void initReader(ByteSliceReader reader, RawPostingList p, int stream) {
+    assert stream < streamCount;
+    final int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
+    final int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
+    reader.init(bytePool,
+                p.byteStart+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
+                ints[upto+stream]);
+  }
+
+  private synchronized void compactPostings() {
+    int upto = 0;
+    for(int i=0;i<postingsHashSize;i++) {
+      if (postingsHash[i] != null) {
+        if (upto < i) {
+          postingsHash[upto] = postingsHash[i];
+          postingsHash[i] = null;
+        }
+        upto++;
+      }
+    }
+
+    assert upto == numPostings;
+    postingsCompacted = true;
+  }
+
+  /** Collapse the hash table & sort in-place. */
+  public RawPostingList[] sortPostings() {
+    compactPostings();
+    quickSort(postingsHash, 0, numPostings-1);
+    return postingsHash;
+  }
+
+  void quickSort(RawPostingList[] postings, int lo, int hi) {
+    if (lo >= hi)
+      return;
+    else if (hi == 1+lo) {
+      if (comparePostings(postings[lo], postings[hi]) > 0) {
+        final RawPostingList tmp = postings[lo];
+        postings[lo] = postings[hi];
+        postings[hi] = tmp;
+      }
+      return;
+    }
+
+    int mid = (lo + hi) >>> 1;
+
+    if (comparePostings(postings[lo], postings[mid]) > 0) {
+      RawPostingList tmp = postings[lo];
+      postings[lo] = postings[mid];
+      postings[mid] = tmp;
+    }
+
+    if (comparePostings(postings[mid], postings[hi]) > 0) {
+      RawPostingList tmp = postings[mid];
+      postings[mid] = postings[hi];
+      postings[hi] = tmp;
+
+      if (comparePostings(postings[lo], postings[mid]) > 0) {
+        RawPostingList tmp2 = postings[lo];
+        postings[lo] = postings[mid];
+        postings[mid] = tmp2;
+      }
+    }
+
+    int left = lo + 1;
+    int right = hi - 1;
+
+    if (left >= right)
+      return;
+
+    RawPostingList partition = postings[mid];
+
+    for (; ;) {
+      while (comparePostings(postings[right], partition) > 0)
+        --right;
+
+      while (left < right && comparePostings(postings[left], partition) <= 0)
+        ++left;
+
+      if (left < right) {
+        RawPostingList tmp = postings[left];
+        postings[left] = postings[right];
+        postings[right] = tmp;
+        --right;
+      } else {
+        break;
+      }
+    }
+
+    quickSort(postings, lo, left);
+    quickSort(postings, left + 1, hi);
+  }
+
+  /** Compares term text for two Posting instance and
+   *  returns -1 if p1 < p2; 1 if p1 > p2; else 0. */
+  int comparePostings(RawPostingList p1, RawPostingList p2) {
+
+    if (p1 == p2)
+      return 0;
+
+    final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+    int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+    final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+    int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+    assert text1 != text2 || pos1 != pos2;
+
+    while(true) {
+      final char c1 = text1[pos1++];
+      final char c2 = text2[pos2++];
+      if (c1 != c2) {
+        if (0xffff == c2)
+          return 1;
+        else if (0xffff == c1)
+          return -1;
+        else
+          return c1-c2;
+      } else
+        // This method should never compare equal postings
+        // unless p1==p2
+        assert c1 != 0xffff;
+    }
+  }
+
+  /** Test whether the text for current RawPostingList p equals
+   *  current tokenText. */
+  private boolean postingEquals(final char[] tokenText, final int tokenTextLen) {
+
+    final char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+    assert text != null;
+    int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+    int tokenPos = 0;
+    for(;tokenPos<tokenTextLen;pos++,tokenPos++)
+      if (tokenText[tokenPos] != text[pos])
+        return false;
+    return 0xffff == text[pos];
+  }
+  
+  private boolean doCall;
+  private boolean doNextCall;
+
+  boolean start(Fieldable[] fields, int count) throws IOException {
+    doCall = consumer.start(fields, count);
+    if (nextPerField != null)
+      doNextCall = nextPerField.start(fields, count);
+    return doCall || doNextCall;
+  }
+
+  // Secondary entry point (for 2nd & subsequent TermsHash),
+  // because token text has already been "interned" into
+  // textStart, so we hash by textStart
+  public void add(Token token, int textStart) throws IOException {
+
+    int code = textStart;
+
+    int hashPos = code & postingsHashMask;
+
+    assert !postingsCompacted;
+
+    // Locate RawPostingList in hash
+    p = postingsHash[hashPos];
+
+    if (p != null && p.textStart != textStart) {
+      // Conflict: keep searching different locations in
+      // the hash table.
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        hashPos = code & postingsHashMask;
+        p = postingsHash[hashPos];
+      } while (p != null && p.textStart != textStart);
+    }
+
+    if (p == null) {
+
+      // First time we are seeing this token since we last
+      // flushed the hash.
+
+      // Refill?
+      if (0 == perThread.freePostingsCount)
+        perThread.morePostings();
+
+      // Pull next free RawPostingList from free list
+      p = perThread.freePostings[--perThread.freePostingsCount];
+      assert p != null;
+
+      p.textStart = textStart;
+          
+      assert postingsHash[hashPos] == null;
+      postingsHash[hashPos] = p;
+      numPostings++;
+
+      if (numPostings == postingsHashHalfSize)
+        rehashPostings(2*postingsHashSize);
+
+      // Init stream slices
+      if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+        intPool.nextBuffer();
+
+      if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
+        bytePool.nextBuffer();
+
+      intUptos = intPool.buffer;
+      intUptoStart = intPool.intUpto;
+      intPool.intUpto += streamCount;
+
+      p.intStart = intUptoStart + intPool.intOffset;
+
+      for(int i=0;i<streamCount;i++) {
+        final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
+        intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
+      }
+      p.byteStart = intUptos[intUptoStart];
+
+      consumer.newTerm(token, p);
+
+    } else {
+      intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
+      intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
+      consumer.addTerm(token, p);
+    }
+  }
+
+  // Primary entry point (for first TermsHash)
+  void add(Token token) throws IOException {
+
+    assert !postingsCompacted;
+
+    // We are first in the chain so we must "intern" the
+    // term text into textStart address
+
+    // Get the text of this term.
+    final char[] tokenText = token.termBuffer();
+    final int tokenTextLen = token.termLength();
+
+    // Compute hashcode & replace any invalid UTF16 sequences
+    int downto = tokenTextLen;
+    int code = 0;
+    while (downto > 0) {
+      char ch = tokenText[--downto];
+
+      if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) {
+        if (0 == downto) {
+          // Unpaired
+          ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
+        } else {
+          final char ch2 = tokenText[downto-1];
+          if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) {
+            // OK: high followed by low.  This is a valid
+            // surrogate pair.
+            code = ((code*31) + ch)*31+ch2;
+            downto--;
+            continue;
+          } else {
+            // Unpaired
+            ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
+          }            
+        }
+      } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END)
+        // Unpaired
+        ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
+
+      code = (code*31) + ch;
+    }
+
+    int hashPos = code & postingsHashMask;
+
+    // Locate RawPostingList in hash
+    p = postingsHash[hashPos];
+
+    if (p != null && !postingEquals(tokenText, tokenTextLen)) {
+      // Conflict: keep searching different locations in
+      // the hash table.
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        hashPos = code & postingsHashMask;
+        p = postingsHash[hashPos];
+      } while (p != null && !postingEquals(tokenText, tokenTextLen));
+    }
+
+    if (p == null) {
+
+      // First time we are seeing this token since we last
+      // flushed the hash.
+      final int textLen1 = 1+tokenTextLen;
+      if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) {
+        if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) {
+          // Just skip this term, to remain as robust as
+          // possible during indexing.  A TokenFilter
+          // can be inserted into the analyzer chain if
+          // other behavior is wanted (pruning the term
+          // to a prefix, throwing an exception, etc).
+
+          if (docState.maxTermPrefix == null)
+            docState.maxTermPrefix = new String(tokenText, 0, 30);
+
+          consumer.skippingLongTerm(token);
+          return;
+        }
+        charPool.nextBuffer();
+      }
+
+      // Refill?
+      if (0 == perThread.freePostingsCount)
+        perThread.morePostings();
+
+      // Pull next free RawPostingList from free list
+      p = perThread.freePostings[--perThread.freePostingsCount];
+      assert p != null;
+
+      final char[] text = charPool.buffer;
+      final int textUpto = charPool.charUpto;
+      p.textStart = textUpto + charPool.charOffset;
+      charPool.charUpto += textLen1;
+      System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen);
+      text[textUpto+tokenTextLen] = 0xffff;
+          
+      assert postingsHash[hashPos] == null;
+      postingsHash[hashPos] = p;
+      numPostings++;
+
+      if (numPostings == postingsHashHalfSize)
+        rehashPostings(2*postingsHashSize);
+
+      // Init stream slices
+      if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+        intPool.nextBuffer();
+
+      if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
+        bytePool.nextBuffer();
+
+      intUptos = intPool.buffer;
+      intUptoStart = intPool.intUpto;
+      intPool.intUpto += streamCount;
+
+      p.intStart = intUptoStart + intPool.intOffset;
+
+      for(int i=0;i<streamCount;i++) {
+        final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
+        intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
+      }
+      p.byteStart = intUptos[intUptoStart];
+
+      consumer.newTerm(token, p);
+
+    } else {
+      intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
+      intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
+      consumer.addTerm(token, p);
+    }
+
+    if (doNextCall)
+      nextPerField.add(token, p.textStart);
+  }
+
+  int[] intUptos;
+  int intUptoStart;
+
+  void writeByte(int stream, byte b) {
+    int upto = intUptos[intUptoStart+stream];
+    byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
+    assert bytes != null;
+    int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK;
+    if (bytes[offset] != 0) {
+      // End of slice; allocate a new one
+      offset = bytePool.allocSlice(bytes, offset);
+      bytes = bytePool.buffer;
+      intUptos[intUptoStart+stream] = offset + bytePool.byteOffset;
+    }
+    bytes[offset] = b;
+    (intUptos[intUptoStart+stream])++;
+  }
+
+  public void writeBytes(int stream, byte[] b, int offset, int len) {
+    // TODO: optimize
+    final int end = offset + len;
+    for(int i=offset;i<end;i++)
+      writeByte(stream, b[i]);
+  }
+
+  void writeVInt(int stream, int i) {
+    while ((i & ~0x7F) != 0) {
+      writeByte(stream, (byte)((i & 0x7f) | 0x80));
+      i >>>= 7;
+    }
+    writeByte(stream, (byte) i);
+  }
+
+  void finish() throws IOException {
+    consumer.finish();
+    if (nextPerField != null)
+      nextPerField.finish();
+  }
+
+  /** Called when postings hash is too small (> 50%
+   *  occupied) or too large (< 20% occupied). */
+  void rehashPostings(final int newSize) {
+
+    final int newMask = newSize-1;
+
+    RawPostingList[] newHash = new RawPostingList[newSize];
+    for(int i=0;i<postingsHashSize;i++) {
+      RawPostingList p0 = postingsHash[i];
+      if (p0 != null) {
+        int code;
+        if (perThread.primary) {
+          final int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+          final char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+          int pos = start;
+          while(text[pos] != 0xffff)
+            pos++;
+          code = 0;
+          while (pos > start)
+            code = (code*31) + text[--pos];
+        } else
+          code = p0.textStart;
+
+        int hashPos = code & newMask;
+        assert hashPos >= 0;
+        if (newHash[hashPos] != null) {
+          final int inc = ((code>>8)+code)|1;
+          do {
+            code += inc;
+            hashPos = code & newMask;
+          } while (newHash[hashPos] != null);
+        }
+        newHash[hashPos] = p0;
+      }
+    }
+
+    postingsHashMask = newMask;
+    postingsHash = newHash;
+    postingsHashSize = newSize;
+    postingsHashHalfSize = newSize >> 1;
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerThread.java?rev=677865&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerThread.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerThread.java Fri Jul 18 02:20:12 2008
@@ -0,0 +1,116 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+final class TermsHashPerThread extends InvertedDocConsumerPerThread {
+
+  final TermsHash termsHash;
+  final TermsHashConsumerPerThread consumer;
+  final TermsHashPerThread nextPerThread;
+
+  final CharBlockPool charPool;
+  final IntBlockPool intPool;
+  final ByteBlockPool bytePool;
+  final boolean primary;
+  final DocumentsWriter.DocState docState;
+
+  final RawPostingList freePostings[] = new RawPostingList[256];
+  int freePostingsCount;
+
+  public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) {
+    docState = docInverterPerThread.docState;
+
+    this.termsHash = termsHash;
+    this.consumer = termsHash.consumer.addThread(this);
+
+    if (nextTermsHash != null) {
+      // We are primary
+      charPool = new CharBlockPool(termsHash.docWriter);
+      primary = true;
+    } else {
+      charPool = primaryPerThread.charPool;
+      primary = false;
+    }
+
+    intPool = new IntBlockPool(termsHash.docWriter, termsHash.trackAllocations);
+    bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator, termsHash.trackAllocations);
+
+    if (nextTermsHash != null)
+      nextPerThread = nextTermsHash.addThread(docInverterPerThread, this);
+    else
+      nextPerThread = null;
+  }
+
+  InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
+    return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo);
+  }
+
+  synchronized public void abort() {
+    reset(true);
+    consumer.abort();
+    if (nextPerThread != null)
+      nextPerThread.abort();
+  }
+
+  // perField calls this when it needs more postings:
+  void morePostings() throws IOException {
+    assert freePostingsCount == 0;
+    termsHash.getPostings(freePostings);
+    freePostingsCount = freePostings.length;
+    for(int i=0;i<freePostingsCount;i++)
+      assert freePostings[i] != null;
+  }
+
+  public void startDocument() throws IOException {
+    consumer.startDocument();
+    if (nextPerThread != null)
+      nextPerThread.consumer.startDocument();
+  }
+
+  public DocumentsWriter.DocWriter finishDocument() throws IOException {
+    final DocumentsWriter.DocWriter doc = consumer.finishDocument();
+
+    final DocumentsWriter.DocWriter doc2;
+    if (nextPerThread != null)
+      doc2 = nextPerThread.consumer.finishDocument();
+    else
+      doc2 = null;
+    if (doc == null)
+      return doc2;
+    else {
+      doc.setNext(doc2);
+      return doc;
+    }
+  }
+
+  // Clear all state
+  void reset(boolean recyclePostings) {
+    intPool.reset();
+    bytePool.reset();
+
+    if (primary)
+      charPool.reset();
+
+    if (recyclePostings) {
+      termsHash.recyclePostings(freePostings, freePostingsCount);
+      freePostingsCount = 0;
+    }
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerThread.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message