lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject lucene-solr:master: LUCENE-7474: Doc values writers should have a sparse encoding.
Date Tue, 04 Oct 2016 17:06:48 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master 796ed508f -> d50cf9761


LUCENE-7474: Doc values writers should have a sparse encoding.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d50cf976
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d50cf976
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d50cf976

Branch: refs/heads/master
Commit: d50cf97617c88ec75fd8f4482003623db08e625e
Parents: 796ed50
Author: Adrien Grand <jpountz@gmail.com>
Authored: Tue Oct 4 18:59:26 2016 +0200
Committer: Adrien Grand <jpountz@gmail.com>
Committed: Tue Oct 4 19:00:53 2016 +0200

----------------------------------------------------------------------
 .../lucene/index/BinaryDocValuesWriter.java     | 76 +++++++------------
 .../lucene/index/NumericDocValuesWriter.java    | 66 +++++-----------
 .../lucene/index/SortedDocValuesWriter.java     | 63 ++++++---------
 .../index/SortedNumericDocValuesWriter.java     | 80 +++++++++-----------
 .../lucene/index/SortedSetDocValuesWriter.java  | 74 +++++++-----------
 5 files changed, 134 insertions(+), 225 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index cbcfec5..ff2e67c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -20,15 +20,16 @@ package org.apache.lucene.index;
 import java.io.IOException;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.PagedBytes;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -49,8 +50,9 @@ class BinaryDocValuesWriter extends DocValuesWriter {
   private final PackedLongValues.Builder lengths;
   private FixedBitSet docsWithField;
   private final FieldInfo fieldInfo;
-  private int addedValues;
   private long bytesUsed;
+  private int lastDocID = -1;
+  private int maxLength = 0;
 
   public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     this.fieldInfo = fieldInfo;
@@ -59,12 +61,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
     this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
     this.iwBytesUsed = iwBytesUsed;
     this.docsWithField = new FixedBitSet(64);
-    this.bytesUsed = docsWithFieldBytesUsed();
+    this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
 
   public void addValue(int docID, BytesRef value) {
-    if (docID < addedValues) {
+    if (docID <= lastDocID) {
       throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears
more than once in this document (only one value is allowed per field)");
     }
     if (value == null) {
@@ -74,12 +76,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
       throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too
large, must be <= " + MAX_LENGTH);
     }
 
-    // Fill in any holes:
-    while(addedValues < docID) {
-      addedValues++;
-      lengths.add(0);
-    }
-    addedValues++;
+    maxLength = Math.max(value.length, maxLength);
     lengths.add(value.length);
     try {
       bytesOut.writeBytes(value.bytes, value.offset, value.length);
@@ -90,15 +87,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
     docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
     docsWithField.set(docID);
     updateBytesUsed();
-  }
-  
-  private long docsWithFieldBytesUsed() {
-    // size of the long[] + some overhead
-    return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+
+    lastDocID = docID;
   }
 
   private void updateBytesUsed() {
-    final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithFieldBytesUsed();
+    final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
     bytesUsed = newBytesUsed;
   }
@@ -109,7 +103,6 @@ class BinaryDocValuesWriter extends DocValuesWriter {
 
   @Override
   public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException
{
-    final int maxDoc = state.segmentInfo.maxDoc();
     bytes.freeze(false);
     final PackedLongValues lengths = this.lengths.build();
     dvConsumer.addBinaryField(fieldInfo,
@@ -119,53 +112,38 @@ class BinaryDocValuesWriter extends DocValuesWriter {
                                   if (fieldInfoIn != fieldInfo) {
                                     throw new IllegalArgumentException("wrong fieldInfo");
                                   }
-                                  return new BufferedBinaryDocValues(maxDoc, lengths);
+                                  return new BufferedBinaryDocValues(lengths, maxLength,
bytes.getDataInput(), docsWithField);
                                 }
                               });
   }
 
   // iterates over the values we have in ram
-  private class BufferedBinaryDocValues extends BinaryDocValues {
-    final BytesRefBuilder value = new BytesRefBuilder();
+  private static class BufferedBinaryDocValues extends BinaryDocValues {
+    final BytesRefBuilder value;
     final PackedLongValues.Iterator lengthsIterator;
-    final DataInput bytesIterator = bytes.getDataInput();
-    final int size;
-    final int maxDoc;
-    private int docID = -1;
+    final DocIdSetIterator docsWithField;
+    final DataInput bytesIterator;
     
-    BufferedBinaryDocValues(int maxDoc, PackedLongValues lengths) {
-      this.maxDoc = maxDoc;
+    BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator,
FixedBitSet docsWithFields) {
+      this.value = new BytesRefBuilder();
+      this.value.grow(maxLength);
       this.lengthsIterator = lengths.iterator();
-      this.size = (int) lengths.size();
+      this.bytesIterator = bytesIterator;
+      this.docsWithField = new BitSetIterator(docsWithFields, lengths.size());
     }
 
     @Override
     public int docID() {
-      return docID;
+      return docsWithField.docID();
     }
 
     @Override
     public int nextDoc() throws IOException {
-      if (docID == size-1) {
-        docID = NO_MORE_DOCS;
-      } else {
-        int next = docsWithField.nextSetBit(docID+1);
-        if (next == NO_MORE_DOCS) {
-          docID = NO_MORE_DOCS;
-        } else {
-
-          int length = 0;
-
-          // skip missing values:
-          while (docID < next) {
-            docID++;
-            length = (int) lengthsIterator.next();
-            assert docID == next || length == 0;
-          }
-          value.grow(length);
-          value.setLength(length);
-          bytesIterator.readBytes(value.bytes(), 0, length);
-        }
+      int docID = docsWithField.nextDoc();
+      if (docID != NO_MORE_DOCS) {
+        int length = Math.toIntExact(lengthsIterator.next());
+        value.setLength(length);
+        bytesIterator.readBytes(value.bytes(), 0, length);
       }
       return docID;
     }
@@ -177,7 +155,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
 
     @Override
     public long cost() {
-      return docsWithField.cardinality();
+      return docsWithField.cost();
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index 03fbe10..adfa706 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -20,9 +20,10 @@ package org.apache.lucene.index;
 import java.io.IOException;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -30,47 +31,38 @@ import org.apache.lucene.util.packed.PackedLongValues;
  *  segment flushes. */
 class NumericDocValuesWriter extends DocValuesWriter {
 
-  private final static long MISSING = 0L;
-
   private PackedLongValues.Builder pending;
   private final Counter iwBytesUsed;
   private long bytesUsed;
   private FixedBitSet docsWithField;
   private final FieldInfo fieldInfo;
+  private int lastDocID = -1;
 
   public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
     docsWithField = new FixedBitSet(64);
-    bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
+    bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     this.fieldInfo = fieldInfo;
     this.iwBytesUsed = iwBytesUsed;
     iwBytesUsed.addAndGet(bytesUsed);
   }
 
   public void addValue(int docID, long value) {
-    if (docID < pending.size()) {
+    if (docID <= lastDocID) {
       throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears
more than once in this document (only one value is allowed per field)");
     }
 
-    // Fill in any holes:
-    for (int i = (int)pending.size(); i < docID; ++i) {
-      pending.add(MISSING);
-    }
-
     pending.add(value);
     docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
     docsWithField.set(docID);
-    
+
     updateBytesUsed();
-  }
-  
-  private long docsWithFieldBytesUsed() {
-    // size of the long[] + some overhead
-    return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+
+    lastDocID = docID;
   }
 
   private void updateBytesUsed() {
-    final long newBytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
+    final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
     bytesUsed = newBytesUsed;
   }
@@ -82,7 +74,6 @@ class NumericDocValuesWriter extends DocValuesWriter {
   @Override
   public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException
{
 
-    final int maxDoc = state.segmentInfo.maxDoc();
     final PackedLongValues values = pending.build();
 
     dvConsumer.addNumericField(fieldInfo,
@@ -92,7 +83,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
                                    if (fieldInfo != NumericDocValuesWriter.this.fieldInfo)
{
                                      throw new IllegalArgumentException("wrong fieldInfo");
                                    }
-                                   return new BufferedNumericDocValues(maxDoc, values, docsWithField);
+                                   return new BufferedNumericDocValues(values, docsWithField);
                                  }
                                });
   }
@@ -100,43 +91,28 @@ class NumericDocValuesWriter extends DocValuesWriter {
   // iterates over the values we have in ram
   private static class BufferedNumericDocValues extends NumericDocValues {
     final PackedLongValues.Iterator iter;
-    final FixedBitSet docsWithField;
-    final int size;
-    final int maxDoc;
+    final DocIdSetIterator docsWithField;
     private long value;
-    private int docID = -1;
-    
-    BufferedNumericDocValues(int maxDoc, PackedLongValues values, FixedBitSet docsWithFields)
{
-      this.maxDoc = maxDoc;
+
+    BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) {
       this.iter = values.iterator();
-      this.size = (int) values.size();
-      this.docsWithField = docsWithFields;
+      this.docsWithField = new BitSetIterator(docsWithFields, values.size());
     }
 
     @Override
     public int docID() {
-      return docID;
+      return docsWithField.docID();
     }
 
     @Override
-    public int nextDoc() {
-      if (docID == size-1) {
-        docID = NO_MORE_DOCS;
-      } else {
-        int next = docsWithField.nextSetBit(docID+1);
-        if (next == NO_MORE_DOCS) {
-          docID = NO_MORE_DOCS;
-        } else {
-          // skip missing values:
-          while (docID < next) {
-            docID++;
-            value = iter.next();
-          }
-        }
+    public int nextDoc() throws IOException {
+      int docID = docsWithField.nextDoc();
+      if (docID != NO_MORE_DOCS) {
+        value = iter.next();
       }
       return docID;
     }
-    
+
     @Override
     public int advance(int target) {
       throw new UnsupportedOperationException();
@@ -144,7 +120,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
 
     @Override
     public long cost() {
-      return docsWithField.cardinality();
+      return docsWithField.cost();
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
index f199fcc..885ee89 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@@ -21,11 +21,14 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 import java.io.IOException;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -34,12 +37,11 @@ import org.apache.lucene.util.packed.PackedLongValues;
 class SortedDocValuesWriter extends DocValuesWriter {
   final BytesRefHash hash;
   private PackedLongValues.Builder pending;
+  private FixedBitSet docsWithField;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this currently only tracks differences in 'pending'
   private final FieldInfo fieldInfo;
-
-  private static final int EMPTY_ORD = -1;
-  private int nonEmptyCount;
+  private int lastDocID = -1;
 
   public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     this.fieldInfo = fieldInfo;
@@ -50,12 +52,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
             BytesRefHash.DEFAULT_CAPACITY,
             new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    bytesUsed = pending.ramBytesUsed();
+    docsWithField = new FixedBitSet(64);
+    bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
 
   public void addValue(int docID, BytesRef value) {
-    if (docID < pending.size()) {
+    if (docID <= lastDocID) {
       throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears
more than once in this document (only one value is allowed per field)");
     }
     if (value == null) {
@@ -65,20 +68,15 @@ class SortedDocValuesWriter extends DocValuesWriter {
       throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too
large, must be <= " + (BYTE_BLOCK_SIZE - 2));
     }
 
-    // Fill in any holes:
-    while(pending.size() < docID) {
-      pending.add(EMPTY_ORD);
-    }
-
     addOneValue(value);
-    nonEmptyCount++;
+    docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
+    docsWithField.set(docID);
+
+    lastDocID = docID;
   }
 
   @Override
   public void finish(int maxDoc) {
-    while(pending.size() < maxDoc) {
-      pending.add(EMPTY_ORD);
-    }
     updateBytesUsed();
   }
 
@@ -99,16 +97,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
   }
   
   private void updateBytesUsed() {
-    final long newBytesUsed = pending.ramBytesUsed();
+    final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
     bytesUsed = newBytesUsed;
   }
 
   @Override
   public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException
{
-    final int maxDoc = state.segmentInfo.maxDoc();
-
-    assert pending.size() == maxDoc;
     final int valueCount = hash.size();
     final PackedLongValues ords = pending.build();
 
@@ -126,51 +121,41 @@ class SortedDocValuesWriter extends DocValuesWriter {
                                   if (fieldInfoIn != fieldInfo) {
                                     throw new IllegalArgumentException("wrong fieldInfo");
                                   }
-                                  return new BufferedSortedDocValues(hash, valueCount, maxDoc,
ords, sortedValues, ordMap, nonEmptyCount);
+                                  return new BufferedSortedDocValues(hash, valueCount, ords,
sortedValues, ordMap, docsWithField);
                                 }
                               });
   }
 
   private static class BufferedSortedDocValues extends SortedDocValues {
     final BytesRefHash hash;
-    final int maxDoc;
     final BytesRef scratch = new BytesRef();
     final int[] sortedValues;
     final int[] ordMap;
-    final int cost;
     final int valueCount;
-    private int docID = -1;
     private int ord;
     final PackedLongValues.Iterator iter;
+    final DocIdSetIterator docsWithField;
 
-    public BufferedSortedDocValues(BytesRefHash hash, int valueCount, int maxDoc, PackedLongValues
docToOrd, int[] sortedValues, int[] ordMap, int cost) {
+    public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd,
int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) {
       this.hash = hash;
       this.valueCount = valueCount;
-      this.maxDoc = maxDoc;
       this.sortedValues = sortedValues;
       this.iter = docToOrd.iterator();
       this.ordMap = ordMap;
-      this.cost = cost;
+      this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size());
     }
 
     @Override
     public int docID() {
-      return docID;
+      return docsWithField.docID();
     }
 
     @Override
-    public int nextDoc() {
-      while (true) {
-        docID++;
-        if (docID >= maxDoc) {
-          docID = NO_MORE_DOCS;
-          break;
-        }
-        ord = (int) iter.next();
-        if (ord != -1) {
-          ord = ordMap[ord];
-          break;
-        }
+    public int nextDoc() throws IOException {
+      int docID = docsWithField.nextDoc();
+      if (docID != NO_MORE_DOCS) {
+        ord = Math.toIntExact(iter.next());
+        ord = ordMap[ord];
       }
       return docID;
     }
@@ -182,7 +167,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
 
     @Override
     public long cost() {
-      return cost;
+      return docsWithField.cost();
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
index 7db5d98..e154547 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
@@ -21,8 +21,11 @@ import java.io.IOException;
 import java.util.Arrays;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
@@ -31,10 +34,11 @@ import org.apache.lucene.util.packed.PackedLongValues;
 class SortedNumericDocValuesWriter extends DocValuesWriter {
   private PackedLongValues.Builder pending; // stream of all values
   private PackedLongValues.Builder pendingCounts; // count of values per doc
+  private FixedBitSet docsWithField;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
   private final FieldInfo fieldInfo;
-  private int currentDoc;
+  private int currentDoc = -1;
   private long currentValues[] = new long[8];
   private int currentUpto = 0;
 
@@ -43,19 +47,16 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
     this.iwBytesUsed = iwBytesUsed;
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
     pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
+    docsWithField = new FixedBitSet(64);
+    bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed()
+ RamUsageEstimator.sizeOf(currentValues);
     iwBytesUsed.addAndGet(bytesUsed);
   }
 
-  public void addValue(int docID, long value) {    
+  public void addValue(int docID, long value) {
+    assert docID >= currentDoc;
     if (docID != currentDoc) {
       finishCurrentDoc();
-    }
-
-    // Fill in any holes:
-    while(currentDoc < docID) {
-      pendingCounts.add(0); // no values
-      currentDoc++;
+      currentDoc = docID;
     }
 
     addOneValue(value);
@@ -64,6 +65,9 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
   
   // finalize currentDoc: this sorts the values in the current doc
   private void finishCurrentDoc() {
+    if (currentDoc == -1) {
+      return;
+    }
     Arrays.sort(currentValues, 0, currentUpto);
     for (int i = 0; i < currentUpto; i++) {
       pending.add(currentValues[i]);
@@ -71,17 +75,14 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
     // record the number of values for this doc
     pendingCounts.add(currentUpto);
     currentUpto = 0;
-    currentDoc++;
+
+    docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
+    docsWithField.set(currentDoc);
   }
 
   @Override
   public void finish(int maxDoc) {
     finishCurrentDoc();
-    
-    // fill in any holes
-    for (int i = currentDoc; i < maxDoc; i++) {
-      pendingCounts.add(0); // no values
-    }
   }
 
   private void addOneValue(long value) {
@@ -94,15 +95,13 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
   }
   
   private void updateBytesUsed() {
-    final long newBytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
+    final long newBytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed()
+ RamUsageEstimator.sizeOf(currentValues);
     iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
     bytesUsed = newBytesUsed;
   }
 
   @Override
   public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException
{
-    final int maxDoc = state.segmentInfo.maxDoc();
-    assert pendingCounts.size() == maxDoc;
     final PackedLongValues values = pending.build();
     final PackedLongValues valueCounts = pendingCounts.build();
 
@@ -113,7 +112,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
                                          if (fieldInfoIn != fieldInfo) {
                                            throw new IllegalArgumentException("wrong fieldInfo");
                                          }
-                                         return new BufferedSortedNumericDocValues(values,
valueCounts);
+                                         return new BufferedSortedNumericDocValues(values,
valueCounts, docsWithField);
                                        }
                                      });
   }
@@ -121,45 +120,31 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
   private static class BufferedSortedNumericDocValues extends SortedNumericDocValues {
     final PackedLongValues.Iterator valuesIter;
     final PackedLongValues.Iterator valueCountsIter;
-    final int maxDoc;
-    final long cost;
-    private int docID = -1;
+    final DocIdSetIterator docsWithField;
     private int valueCount;
     private int valueUpto;
 
-    public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts)
{
+    public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts,
FixedBitSet docsWithField) {
       valuesIter = values.iterator();
       valueCountsIter = valueCounts.iterator();
-      maxDoc = Math.toIntExact(valueCounts.size());
-      cost = values.size();
+      this.docsWithField = new BitSetIterator(docsWithField, values.size());
     }
 
     @Override
     public int docID() {
-      return docID;
+      return docsWithField.docID();
     }
 
     @Override
-    public int nextDoc() {
-
-      // consume any un-consumed values from current doc
-      while(valueUpto < valueCount) {
+    public int nextDoc() throws IOException {
+      for (int i = valueUpto; i < valueCount; ++i) {
         valuesIter.next();
-        valueUpto++;
       }
-      
-      while (true) {
-        docID++;
-        if (docID == maxDoc) {
-          docID = NO_MORE_DOCS;
-          break;
-        } else {
-          valueCount = Math.toIntExact(valueCountsIter.next());
-          if (valueCount > 0) {
-            valueUpto = 0;
-            break;
-          }
-        }
+
+      int docID = docsWithField.nextDoc();
+      if (docID != NO_MORE_DOCS) {
+        valueCount = Math.toIntExact(valueCountsIter.next());
+        valueUpto = 0;
       }
       return docID;
     }
@@ -176,13 +161,16 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
 
     @Override
     public long nextValue() {
+      if (valueUpto == valueCount) {
+        throw new IllegalStateException();
+      }
       valueUpto++;
       return valuesIter.next();
     }
-    
+
     @Override
     public long cost() {
-      return cost;
+      return docsWithField.cost();
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index b474d86..e7d915f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -22,12 +22,15 @@ import java.io.IOException;
 import java.util.Arrays;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -37,10 +40,11 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
   final BytesRefHash hash;
   private PackedLongValues.Builder pending; // stream of all termIDs
   private PackedLongValues.Builder pendingCounts; // termIDs per doc
+  private FixedBitSet docsWithField;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
   private final FieldInfo fieldInfo;
-  private int currentDoc;
+  private int currentDoc = -1;
   private int currentValues[] = new int[8];
   private int currentUpto;
   private int maxCount;
@@ -55,26 +59,23 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
             new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
     pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
+    docsWithField = new FixedBitSet(64);
     bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
 
   public void addValue(int docID, BytesRef value) {
+    assert docID >= currentDoc;
     if (value == null) {
       throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": null value not
allowed");
     }
     if (value.length > (BYTE_BLOCK_SIZE - 2)) {
       throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too
large, must be <= " + (BYTE_BLOCK_SIZE - 2));
     }
-    
+
     if (docID != currentDoc) {
       finishCurrentDoc();
-    }
-
-    // Fill in any holes:
-    while(currentDoc < docID) {
-      pendingCounts.add(0); // no values
-      currentDoc++;
+      currentDoc = docID;
     }
 
     addOneValue(value);
@@ -83,6 +84,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
   
   // finalize currentDoc: this deduplicates the current term ids
   private void finishCurrentDoc() {
+    if (currentDoc == -1) {
+      return;
+    }
     Arrays.sort(currentValues, 0, currentUpto);
     int lastValue = -1;
     int count = 0;
@@ -99,17 +103,13 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
     pendingCounts.add(count);
     maxCount = Math.max(maxCount, count);
     currentUpto = 0;
-    currentDoc++;
+    docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
+    docsWithField.set(currentDoc);
   }
 
   @Override
   public void finish(int maxDoc) {
     finishCurrentDoc();
-    
-    // fill in any holes
-    for (int i = currentDoc; i < maxDoc; i++) {
-      pendingCounts.add(0); // no values
-    }
   }
 
   private void addOneValue(BytesRef value) {
@@ -141,9 +141,6 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
 
   @Override
   public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException
{
-    final int maxDoc = state.segmentInfo.maxDoc();
-    final int maxCountPerDoc = maxCount;
-    assert pendingCounts.size() == maxDoc;
     final int valueCount = hash.size();
     final PackedLongValues ords = pending.build();
     final PackedLongValues ordCounts = pendingCounts.build();
@@ -161,66 +158,51 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
                                      if (fieldInfoIn != fieldInfo) {
                                        throw new IllegalArgumentException("wrong fieldInfo");
                                      }
-                                     return new BufferedSortedSetDocValues(sortedValues,
ordMap, hash, ords, ordCounts, maxCount);
+                                     return new BufferedSortedSetDocValues(sortedValues,
ordMap, hash, ords, ordCounts, maxCount, docsWithField);
                                    }
                                  });
   }
 
   private static class BufferedSortedSetDocValues extends SortedSetDocValues {
-    private int docID = -1;
     final int[] sortedValues;
     final int[] ordMap;
     final BytesRefHash hash;
-    final PackedLongValues ords;
-    final PackedLongValues ordCounts;
     final BytesRef scratch = new BytesRef();
     final PackedLongValues.Iterator ordsIter;
     final PackedLongValues.Iterator ordCountsIter;
+    final DocIdSetIterator docsWithField;
     final int currentDoc[];
     
     private int ordCount;
     private int ordUpto;
 
-    public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash,
PackedLongValues ords, PackedLongValues ordCounts, int maxCount) {
+    public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash,
PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField)
{
       this.currentDoc = new int[maxCount];
       this.sortedValues = sortedValues;
       this.ordMap = ordMap;
       this.hash = hash;
-      this.ords = ords;
-      this.ordCounts = ordCounts;
       this.ordsIter = ords.iterator();
       this.ordCountsIter = ordCounts.iterator();
+      this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size());
     }
 
     @Override
     public int docID() {
-      return docID;
+      return docsWithField.docID();
     }
 
     @Override
-    public int nextDoc() {
-      // consume any un-consumed ords from current doc
-      while (ordUpto < ordCount) {
-        ordsIter.next();
-        ordUpto++;
-      }
-      while (true) {
-        docID++;
-        if (docID == ordCounts.size()) {
-          docID = NO_MORE_DOCS;
-          break;
-        }
+    public int nextDoc() throws IOException {
+      int docID = docsWithField.nextDoc();
+      if (docID != NO_MORE_DOCS) {
         ordCount = (int) ordCountsIter.next();
-        if (ordCount > 0) {
-          for(int i=0;i<ordCount;i++) {
-            currentDoc[i] = ordMap[Math.toIntExact(ordsIter.next())];
-          }
-          Arrays.sort(currentDoc, 0, ordCount);          
-          ordUpto = 0;
-          break;
+        assert ordCount > 0;
+        for (int i = 0; i < ordCount; i++) {
+          currentDoc[i] = ordMap[Math.toIntExact(ordsIter.next())];
         }
+        Arrays.sort(currentDoc, 0, ordCount);          
+        ordUpto = 0;
       }
-
       return docID;
     }
 
@@ -235,7 +217,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
 
     @Override
     public long cost() {
-      return ordCounts.size();
+      return docsWithField.cost();
     }
 
     @Override


Mime
View raw message