lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1534320 [12/39] - in /lucene/dev/branches/lucene4956: ./ dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/ dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...
Date Mon, 21 Oct 2013 18:58:44 GMT
Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
Mon Oct 21 18:58:24 2013
@@ -223,11 +223,30 @@ public class FieldInfos implements Itera
         (dvType == null || docValuesType.get(name) == null || dvType == docValuesType.get(name));
     }
 
+    /**
+     * Returns true if the {@code fieldName} exists in the map and is of the
+     * same {@code dvType}.
+     */
+    synchronized boolean contains(String fieldName, DocValuesType dvType) {
+      // used by IndexWriter.updateNumericDocValue
+      if (!nameToNumber.containsKey(fieldName)) {
+        return false;
+      } else {
+        // only return true if the field has the same dvType as the requested one
+        return dvType == docValuesType.get(fieldName);
+      }
+    }
+    
     synchronized void clear() {
       numberToName.clear();
       nameToNumber.clear();
       docValuesType.clear();
     }
+
+    synchronized void setDocValuesType(int number, String name, DocValuesType dvType) {
+      assert containsConsistent(number, name, dvType);
+      docValuesType.put(name, dvType);
+    }
   }
   
   static final class Builder {
@@ -287,7 +306,14 @@ public class FieldInfos implements Itera
         fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
 
         if (docValues != null) {
-          fi.setDocValuesType(docValues);
+          // only pay the synchronization cost if fi does not already have a DVType
+          boolean updateGlobal = !fi.hasDocValues();
+          fi.setDocValuesType(docValues); // this will also perform the consistency check.
+          if (updateGlobal) {
+            // must also update docValuesType map so it's
+            // aware of this field's DocValueType 
+            globalFieldNumbers.setDocValuesType(fi.number, name, docValues);
+          }
         }
 
         if (!fi.omitsNorms() && normType != null) {

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 import java.util.Iterator;
 
 import org.apache.lucene.search.CachingWrapperFilter;
@@ -98,11 +97,6 @@ public class FilterAtomicReader extends 
     public TermsEnum iterator(TermsEnum reuse) throws IOException {
       return in.iterator(reuse);
     }
-    
-    @Override
-    public Comparator<BytesRef> getComparator() {
-      return in.getComparator();
-    }
 
     @Override
     public long size() throws IOException {
@@ -125,6 +119,11 @@ public class FilterAtomicReader extends 
     }
 
     @Override
+    public boolean hasFreqs() {
+      return in.hasFreqs();
+    }
+
+    @Override
     public boolean hasOffsets() {
       return in.hasOffsets();
     }
@@ -200,11 +199,6 @@ public class FilterAtomicReader extends 
     public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse,
int flags) throws IOException {
       return in.docsAndPositions(liveDocs, reuse, flags);
     }
-
-    @Override
-    public Comparator<BytesRef> getComparator() {
-      return in.getComparator();
-    }
   }
 
   /** Base class for filtering {@link DocsEnum} implementations. */
@@ -414,4 +408,10 @@ public class FilterAtomicReader extends 
     return in.getNormValues(field);
   }
 
+  @Override
+  public Bits getDocsWithField(String field) throws IOException {
+    ensureOpen();
+    return in.getDocsWithField(field);
+  }
+
 }

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.AttributeSource;
@@ -28,7 +27,7 @@ import org.apache.lucene.util.Bits;
  * Abstract class for enumerating a subset of all terms. 
  * 
  * <p>Term enumerations are always ordered by
- * {@link #getComparator}.  Each term in the enumeration is
+ * {@link BytesRef#compareTo}.  Each term in the enumeration is
  * greater than all that precede it.</p>
  * <p><em>Please note:</em> Consumers of this enum cannot
  * call {@code seek()}, it is forward only; it throws
@@ -135,11 +134,6 @@ public abstract class FilteredTermsEnum 
   }
 
   @Override
-  public Comparator<BytesRef> getComparator() {
-    return tenum.getComparator();
-  }
-    
-  @Override
   public int docFreq() throws IOException {
     return tenum.docFreq();
   }
@@ -221,7 +215,7 @@ public abstract class FilteredTermsEnum 
         final BytesRef t = nextSeekTerm(actualTerm);
         //System.out.println("  seek to t=" + (t == null ? "null" : t.utf8ToString()) + "
tenum=" + tenum);
         // Make sure we always seek forward:
-        assert actualTerm == null || t == null || getComparator().compare(t, actualTerm)
> 0: "curTerm=" + actualTerm + " seekTerm=" + t;
+        assert actualTerm == null || t == null || t.compareTo(actualTerm) > 0: "curTerm="
+ actualTerm + " seekTerm=" + t;
         if (t == null || tenum.seekCeil(t) == SeekStatus.END) {
           // no more terms to seek to or enum exhausted
           //System.out.println("  return null");

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
Mon Oct 21 18:58:24 2013
@@ -68,12 +68,11 @@ class FlushByRamOrCountsPolicy extends F
         control.setApplyAllDeletes();
       }
     }
-    final DocumentsWriter writer = this.writer.get();
     if ((flushOnRAM() &&
         control.getDeleteBytesUsed() > (1024*1024*indexWriterConfig.getRAMBufferSizeMB())))
{
       control.setApplyAllDeletes();
-     if (writer.infoStream.isEnabled("FP")) {
-       writer.infoStream.message("FP", "force apply deletes bytesUsed=" + control.getDeleteBytesUsed()
+ " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB()));
+     if (infoStream.isEnabled("FP")) {
+       infoStream.message("FP", "force apply deletes bytesUsed=" + control.getDeleteBytesUsed()
+ " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB()));
      }
    }
   }
@@ -89,9 +88,8 @@ class FlushByRamOrCountsPolicy extends F
       final long limit = (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024.d * 1024.d);
       final long totalRam = control.activeBytes() + control.getDeleteBytesUsed();
       if (totalRam >= limit) {
-        final DocumentsWriter writer = this.writer.get();
-        if (writer.infoStream.isEnabled("FP")) {
-          writer.infoStream.message("FP", "flush: activeBytes=" + control.activeBytes() +
" deleteBytes=" + control.getDeleteBytesUsed() + " vs limit=" + limit);
+        if (infoStream.isEnabled("FP")) {
+          infoStream.message("FP", "flush: activeBytes=" + control.activeBytes() + " deleteBytes="
+ control.getDeleteBytesUsed() + " vs limit=" + limit);
         }
         markLargestWriterPending(control, state, totalRam);
       }

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
Mon Oct 21 18:58:24 2013
@@ -20,6 +20,7 @@ import java.util.Iterator;
 
 import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.SetOnce;
 
 /**
@@ -52,8 +53,8 @@ import org.apache.lucene.util.SetOnce;
  * @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
  */
 abstract class FlushPolicy implements Cloneable {
-  protected SetOnce<DocumentsWriter> writer = new SetOnce<DocumentsWriter>();
   protected LiveIndexWriterConfig indexWriterConfig;
+  protected InfoStream infoStream;
 
   /**
    * Called for each delete term. If this is a delete triggered due to an update
@@ -93,9 +94,9 @@ abstract class FlushPolicy implements Cl
   /**
    * Called by DocumentsWriter to initialize the FlushPolicy
    */
-  protected synchronized void init(DocumentsWriter docsWriter) {
-    writer.set(docsWriter);
-    indexWriterConfig = docsWriter.indexWriter.getConfig();
+  protected synchronized void init(LiveIndexWriterConfig indexWriterConfig) {
+    this.indexWriterConfig = indexWriterConfig;
+    infoStream = indexWriterConfig.getInfoStream();
   }
 
   /**
@@ -127,8 +128,8 @@ abstract class FlushPolicy implements Cl
   }
   
   private boolean assertMessage(String s) {
-    if (writer.get().infoStream.isEnabled("FP")) {
-      writer.get().infoStream.message("FP", s);
+    if (infoStream.isEnabled("FP")) {
+      infoStream.message("FP", s);
     }
     return true;
   }
@@ -142,8 +143,8 @@ abstract class FlushPolicy implements Cl
       // should not happen
       throw new RuntimeException(e);
     }
-    clone.writer = new SetOnce<DocumentsWriter>();
     clone.indexWriterConfig = null;
+    clone.infoStream = null; 
     return clone;
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
Mon Oct 21 18:58:24 2013
@@ -19,19 +19,64 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.util.IOUtils;
 
 final class FreqProxTermsWriter extends TermsHashConsumer {
 
   @Override
   void abort() {}
 
+  private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
+    // Process any pending Term deletes for this newly
+    // flushed segment:
+    if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+      Map<Term,Integer> segDeletes = state.segDeletes.terms;
+      List<Term> deleteTerms = new ArrayList<Term>(segDeletes.keySet());
+      Collections.sort(deleteTerms);
+      String lastField = null;
+      TermsEnum termsEnum = null;
+      DocsEnum docsEnum = null;
+      for(Term deleteTerm : deleteTerms) {
+        if (deleteTerm.field().equals(lastField) == false) {
+          lastField = deleteTerm.field();
+          Terms terms = fields.terms(lastField);
+          if (terms != null) {
+            termsEnum = terms.iterator(termsEnum);
+          } else {
+            termsEnum = null;
+          }
+        }
+
+        if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
+          docsEnum = termsEnum.docs(null, docsEnum, 0);
+          int delDocLimit = segDeletes.get(deleteTerm);
+          while (true) {
+            int doc = docsEnum.nextDoc();
+            if (doc == DocsEnum.NO_MORE_DOCS) {
+              break;
+            }
+            if (doc < delDocLimit) {
+              if (state.liveDocs == null) {
+                state.liveDocs = state.segmentInfo.getCodec().liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
+              }
+              if (state.liveDocs.get(doc)) {
+                state.delCountOnFlush++;
+                state.liveDocs.clear(doc);
+              }
+            } else {
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+
   // TODO: would be nice to factor out more of this, eg the
   // FreqProxFieldMergeState, and code to visit all Fields
   // under the same FieldInfo together, up into TermsHash*.
@@ -47,63 +92,20 @@ final class FreqProxTermsWriter extends 
     for (TermsHashConsumerPerField f : fieldsToFlush.values()) {
       final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
       if (perField.termsHashPerField.bytesHash.size() > 0) {
+        perField.sortPostings();
+        assert perField.fieldInfo.isIndexed();
         allFields.add(perField);
       }
     }
 
-    final int numAllFields = allFields.size();
-
     // Sort by field name
     CollectionUtil.introSort(allFields);
 
-    final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
+    Fields fields = new FreqProxFields(allFields);
 
-    boolean success = false;
+    applyDeletes(state, fields);
 
-    try {
-      TermsHash termsHash = null;
-      
-      /*
-    Current writer chain:
-      FieldsConsumer
-        -> IMPL: FormatPostingsTermsDictWriter
-          -> TermsConsumer
-            -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
-              -> DocsConsumer
-                -> IMPL: FormatPostingsDocsWriter
-                  -> PositionsConsumer
-                    -> IMPL: FormatPostingsPositionsWriter
-       */
-      
-      for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
-        final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
-        
-        final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
-
-        // If this field has postings then add them to the
-        // segment
-        fieldWriter.flush(fieldInfo.name, consumer, state);
-        
-        TermsHashPerField perField = fieldWriter.termsHashPerField;
-        assert termsHash == null || termsHash == perField.termsHash;
-        termsHash = perField.termsHash;
-        int numPostings = perField.bytesHash.size();
-        perField.reset();
-        perField.shrinkHash(numPostings);
-        fieldWriter.reset();
-      }
-      
-      if (termsHash != null) {
-        termsHash.reset();
-      }
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(consumer);
-      } else {
-        IOUtils.closeWhileHandlingException(consumer);
-      }
-    }
+    state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state).write(fields);
   }
 
   BytesRef payload;

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
Mon Oct 21 18:58:24 2013
@@ -17,19 +17,10 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Map;
-
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.RamUsageEstimator;
 
 // TODO: break into separate freq and prox writers as
@@ -42,11 +33,16 @@ final class FreqProxTermsWriterPerField 
   final FieldInfo fieldInfo;
   final DocumentsWriterPerThread.DocState docState;
   final FieldInvertState fieldState;
-  private boolean hasFreq;
-  private boolean hasProx;
-  private boolean hasOffsets;
+  boolean hasFreq;
+  boolean hasProx;
+  boolean hasOffsets;
   PayloadAttribute payloadAttribute;
   OffsetAttribute offsetAttribute;
+  long sumTotalTermFreq;
+  long sumDocFreq;
+
+  // How many docs have this field:
+  int docCount;
 
   public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter
parent, FieldInfo fieldInfo) {
     this.termsHashPerField = termsHashPerField;
@@ -68,6 +64,12 @@ final class FreqProxTermsWriterPerField 
 
   @Override
   void finish() {
+    sumDocFreq += fieldState.uniqueTermCount;
+    sumTotalTermFreq += fieldState.length;
+    if (fieldState.length > 0) {
+      docCount++;
+    }
+
     if (hasPayloads) {
       fieldInfo.setStorePayloads();
     }
@@ -83,14 +85,6 @@ final class FreqProxTermsWriterPerField 
     return fieldInfo.name.compareTo(other.fieldInfo.name);
   }
 
-  // Called after flush
-  void reset() {
-    // Record, up front, whether our in-RAM format will be
-    // with or without term freqs:
-    setIndexOptions(fieldInfo.getIndexOptions());
-    payloadAttribute = null;
-  }
-
   private void setIndexOptions(IndexOptions indexOptions) {
     if (indexOptions == null) {
       // field could later be updated with indexed=true, so set everything on
@@ -318,233 +312,10 @@ final class FreqProxTermsWriterPerField 
 
   BytesRef payload;
 
-  /* Walk through all unique text tokens (Posting
-   * instances) found in this field and serialize them
-   * into a single RAM segment. */
-  void flush(String fieldName, FieldsConsumer consumer,  final SegmentWriteState state)
-    throws IOException {
-
-    if (!fieldInfo.isIndexed()) {
-      return; // nothing to flush, don't bother the codec with the unindexed field
-    }
-    
-    final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
-    final Comparator<BytesRef> termComp = termsConsumer.getComparator();
-
-    // CONFUSING: this.indexOptions holds the index options
-    // that were current when we first saw this field.  But
-    // it's possible this has changed, eg when other
-    // documents are indexed that cause a "downgrade" of the
-    // IndexOptions.  So we must decode the in-RAM buffer
-    // according to this.indexOptions, but then write the
-    // new segment to the directory according to
-    // currentFieldIndexOptions:
-    final IndexOptions currentFieldIndexOptions = fieldInfo.getIndexOptions();
-    assert currentFieldIndexOptions != null;
-
-    final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS)
>= 0;
-    final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0;
-    final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
-
-    final boolean readTermFreq = this.hasFreq;
-    final boolean readPositions = this.hasProx;
-    final boolean readOffsets = this.hasOffsets;
-
-    //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + "
readOffs=" + readOffsets);
-
-    // Make sure FieldInfo.update is working correctly!:
-    assert !writeTermFreq || readTermFreq;
-    assert !writePositions || readPositions;
-    assert !writeOffsets || readOffsets;
-
-    assert !writeOffsets || writePositions;
-
-    final Map<Term,Integer> segDeletes;
-    if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
-      segDeletes = state.segDeletes.terms;
-    } else {
-      segDeletes = null;
-    }
-
-    final int[] termIDs = termsHashPerField.sortPostings(termComp);
-    final int numTerms = termsHashPerField.bytesHash.size();
-    final BytesRef text = new BytesRef();
-    final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
-    final ByteSliceReader freq = new ByteSliceReader();
-    final ByteSliceReader prox = new ByteSliceReader();
-
-    FixedBitSet visitedDocs = new FixedBitSet(state.segmentInfo.getDocCount());
-    long sumTotalTermFreq = 0;
-    long sumDocFreq = 0;
-
-    Term protoTerm = new Term(fieldName);
-    for (int i = 0; i < numTerms; i++) {
-      final int termID = termIDs[i];
-      //System.out.println("term=" + termID);
-      // Get BytesRef
-      final int textStart = postings.textStarts[termID];
-      termsHashPerField.bytePool.setBytesRef(text, textStart);
-
-      termsHashPerField.initReader(freq, termID, 0);
-      if (readPositions || readOffsets) {
-        termsHashPerField.initReader(prox, termID, 1);
-      }
-
-      // TODO: really TermsHashPerField should take over most
-      // of this loop, including merge sort of terms from
-      // multiple threads and interacting with the
-      // TermsConsumer, only calling out to us (passing us the
-      // DocsConsumer) to handle delivery of docs/positions
-
-      final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
-
-      final int delDocLimit;
-      if (segDeletes != null) {
-        protoTerm.bytes = text;
-        final Integer docIDUpto = segDeletes.get(protoTerm);
-        if (docIDUpto != null) {
-          delDocLimit = docIDUpto;
-        } else {
-          delDocLimit = 0;
-        }
-      } else {
-        delDocLimit = 0;
-      }
-
-      // Now termStates has numToMerge FieldMergeStates
-      // which all share the same term.  Now we must
-      // interleave the docID streams.
-      int docFreq = 0;
-      long totalTermFreq = 0;
-      int docID = 0;
-
-      while(true) {
-        //System.out.println("  cycle");
-        final int termFreq;
-        if (freq.eof()) {
-          if (postings.lastDocCodes[termID] != -1) {
-            // Return last doc
-            docID = postings.lastDocIDs[termID];
-            if (readTermFreq) {
-              termFreq = postings.termFreqs[termID];
-            } else {
-              termFreq = -1;
-            }
-            postings.lastDocCodes[termID] = -1;
-          } else {
-            // EOF
-            break;
-          }
-        } else {
-          final int code = freq.readVInt();
-          if (!readTermFreq) {
-            docID += code;
-            termFreq = -1;
-          } else {
-            docID += code >>> 1;
-            if ((code & 1) != 0) {
-              termFreq = 1;
-            } else {
-              termFreq = freq.readVInt();
-            }
-          }
-
-          assert docID != postings.lastDocIDs[termID];
-        }
-
-        docFreq++;
-        assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" +
state.segmentInfo.getDocCount();
-
-        // NOTE: we could check here if the docID was
-        // deleted, and skip it.  However, this is somewhat
-        // dangerous because it can yield non-deterministic
-        // behavior since we may see the docID before we see
-        // the term that caused it to be deleted.  This
-        // would mean some (but not all) of its postings may
-        // make it into the index, which'd alter the docFreq
-        // for those terms.  We could fix this by doing two
-        // passes, ie first sweep marks all del docs, and
-        // 2nd sweep does the real flush, but I suspect
-        // that'd add too much time to flush.
-        visitedDocs.set(docID);
-        postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
-        if (docID < delDocLimit) {
-          // Mark it deleted.  TODO: we could also skip
-          // writing its postings; this would be
-          // deterministic (just for this Term's docs).
-          
-          // TODO: can we do this reach-around in a cleaner way????
-          if (state.liveDocs == null) {
-            state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
-          }
-          if (state.liveDocs.get(docID)) {
-            state.delCountOnFlush++;
-            state.liveDocs.clear(docID);
-          }
-        }
-
-        totalTermFreq += termFreq;
-        
-        // Carefully copy over the prox + payload info,
-        // changing the format to match Lucene's segment
-        // format.
-
-        if (readPositions || readOffsets) {
-          // we did record positions (& maybe payload) and/or offsets
-          int position = 0;
-          int offset = 0;
-          for(int j=0;j<termFreq;j++) {
-            final BytesRef thisPayload;
-
-            if (readPositions) {
-              final int code = prox.readVInt();
-              position += code >>> 1;
-
-              if ((code & 1) != 0) {
-
-                // This position has a payload
-                final int payloadLength = prox.readVInt();
-
-                if (payload == null) {
-                  payload = new BytesRef();
-                  payload.bytes = new byte[payloadLength];
-                } else if (payload.bytes.length < payloadLength) {
-                  payload.grow(payloadLength);
-                }
-
-                prox.readBytes(payload.bytes, 0, payloadLength);
-                payload.length = payloadLength;
-                thisPayload = payload;
-
-              } else {
-                thisPayload = null;
-              }
-
-              if (readOffsets) {
-                final int startOffset = offset + prox.readVInt();
-                final int endOffset = startOffset + prox.readVInt();
-                if (writePositions) {
-                  if (writeOffsets) {
-                    assert startOffset >=0 && endOffset >= startOffset : "startOffset="
+ startOffset + ",endOffset=" + endOffset + ",offset=" + offset;
-                    postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
-                  } else {
-                    postingsConsumer.addPosition(position, thisPayload, -1, -1);
-                  }
-                }
-                offset = startOffset;
-              } else if (writePositions) {
-                postingsConsumer.addPosition(position, thisPayload, -1, -1);
-              }
-            }
-          }
-        }
-        postingsConsumer.finishDoc();
-      }
-      termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq
: -1));
-      sumTotalTermFreq += totalTermFreq;
-      sumDocFreq += docFreq;
-    }
+  int[] sortedTermIDs;
 
-    termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
+  void sortPostings() {
+    assert sortedTermIDs == null;
+    sortedTermIDs = termsHashPerField.sortPostings();
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
Mon Oct 21 18:58:24 2013
@@ -17,32 +17,39 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
-
-/** Holds buffered deletes by term or query, once pushed.
- *  Pushed deletes are write-once, so we shift to more
- *  memory efficient data structure to hold them.  We don't
- *  hold docIDs because these are applied on flush. */
 
-class FrozenBufferedDeletes {
+/**
+ * Holds buffered deletes and updates by term or query, once pushed. Pushed
+ * deletes/updates are write-once, so we shift to more memory efficient data
+ * structure to hold them. We don't hold docIDs because these are applied on
+ * flush.
+ */
+class FrozenBufferedDeletes { // TODO (DVU_RENAME) FrozenBufferedUpdates?
 
   /* Query we often undercount (say 24 bytes), plus int. */
   final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT
+ 24;
-
+  
   // Terms, in sorted order:
   final PrefixCodedTerms terms;
   int termCount; // just for debugging
 
-  // Parallel array of deleted query, and the docIDUpto for
-  // each
+  // Parallel array of deleted query, and the docIDUpto for each
   final Query[] queries;
   final int[] queryLimits;
+  
+  // numeric DV update term and their updates
+  final NumericUpdate[] updates;
+  
   final int bytesUsed;
   final int numTermDeletes;
   private long gen = -1; // assigned by BufferedDeletesStream once pushed
@@ -72,7 +79,21 @@ class FrozenBufferedDeletes {
       upto++;
     }
 
-    bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY;
+    // TODO if a Term affects multiple fields, we could keep the updates key'd by Term
+    // so that it maps to all fields it affects, sorted by their docUpto, and traverse
+    // that Term only once, applying the update to all fields that still need to be
+    // updated. 
+    List<NumericUpdate> allUpdates = new ArrayList<NumericUpdate>();
+    int numericUpdatesSize = 0;
+    for (LinkedHashMap<Term,NumericUpdate> fieldUpdates : deletes.numericUpdates.values())
{
+      for (NumericUpdate update : fieldUpdates.values()) {
+        allUpdates.add(update);
+        numericUpdatesSize += update.sizeInBytes();
+      }
+    }
+    updates = allUpdates.toArray(new NumericUpdate[allUpdates.size()]);
+    
+    bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY + numericUpdatesSize
+ updates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
     numTermDeletes = deletes.numTermDeletes.get();
   }
   
@@ -140,6 +161,6 @@ class FrozenBufferedDeletes {
   }
   
   boolean any() {
-    return termCount > 0 || queries.length > 0;
+    return termCount > 0 || queries.length > 0 || updates.length > 0;
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
Mon Oct 21 18:58:24 2013
@@ -92,7 +92,7 @@ final class IndexFileDeleter implements 
 
   /* Holds files we had incref'd from the previous
    * non-commit checkpoint: */
-  private List<Collection<String>> lastFiles = new ArrayList<Collection<String>>();
+  private final List<String> lastFiles = new ArrayList<String>();
 
   /* Commits that the IndexDeletionPolicy have decided to delete: */
   private List<CommitPoint> commitsToDelete = new ArrayList<CommitPoint>();
@@ -361,14 +361,13 @@ final class IndexFileDeleter implements 
     refresh(null);
   }
 
+  @Override
   public void close() throws IOException {
     // DecRef old files from the last checkpoint, if any:
     assert locked();
-    int size = lastFiles.size();
-    if (size > 0) {
-      for(int i=0;i<size;i++) {
-        decRef(lastFiles.get(i));
-      }
+
+    if (!lastFiles.isEmpty()) {
+      decRef(lastFiles);
       lastFiles.clear();
     }
 
@@ -459,13 +458,11 @@ final class IndexFileDeleter implements 
       deleteCommits();
     } else {
       // DecRef old files from the last checkpoint, if any:
-      for (Collection<String> lastFile : lastFiles) {
-        decRef(lastFile);
-      }
+      decRef(lastFiles);
       lastFiles.clear();
 
       // Save files so we can decr on next checkpoint/commit:
-      lastFiles.add(segmentInfos.files(directory, false));
+      lastFiles.addAll(segmentInfos.files(directory, false));
     }
     if (infoStream.isEnabled("IFD")) {
       long t1 = System.nanoTime();

Modified: lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java
(original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java
Mon Oct 21 18:58:24 2013
@@ -71,6 +71,9 @@ public final class IndexUpgrader {
    *  command-line. */
   @SuppressWarnings("deprecation")
   public static void main(String[] args) throws IOException {
+    parseArgs(args).upgrade();
+  }
+  static IndexUpgrader parseArgs(String[] args) throws IOException {
     String path = null;
     boolean deletePriorCommits = false;
     PrintStream out = null;
@@ -82,8 +85,6 @@ public final class IndexUpgrader {
         deletePriorCommits = true;
       } else if ("-verbose".equals(arg)) {
         out = System.out;
-      } else if (path == null) {
-        path = arg;
       } else if ("-dir-impl".equals(arg)) {
         if (i == args.length - 1) {
           System.out.println("ERROR: missing value for -dir-impl option");
@@ -91,6 +92,8 @@ public final class IndexUpgrader {
         }
         i++;
         dirImpl = args[i];
+      } else if (path == null) {
+        path = arg;
       }else {
         printUsage();
       }
@@ -106,7 +109,7 @@ public final class IndexUpgrader {
     } else {
       dir = CommandLineUtil.newFSDirectory(dirImpl, new File(path));
     }
-    new IndexUpgrader(dir, Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
+    return new IndexUpgrader(dir, Version.LUCENE_CURRENT, out, deletePriorCommits);
   }
   
   private final Directory dir;
@@ -123,7 +126,10 @@ public final class IndexUpgrader {
    * {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit
points by removing
    * all older ones. If {@code infoStream} is not {@code null}, all logging output will be
sent to this stream. */
   public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean
deletePriorCommits) {
-    this(dir, new IndexWriterConfig(matchVersion, null).setInfoStream(infoStream), deletePriorCommits);
+    this(dir, new IndexWriterConfig(matchVersion, null), deletePriorCommits);
+    if (null != infoStream) {
+      this.iwc.setInfoStream(infoStream);
+    }
   }
   
   /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the
given



Mime
View raw message