lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject lucene-solr git commit: LUCENE-7098: reduce OfflineSorter and BKDWriter IO by using 4 bytes instead of 8 bytes to encord ord in the common case
Date Sat, 12 Mar 2016 10:16:37 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x da67b5627 -> a5b941baa


LUCENE-7098: reduce OfflineSorter and BKDWriter IO by using 4 bytes instead of 8 bytes to
encord ord in the common case

Squashed commit of the following:

commit 5ac2dcf2a972e46ccda3e7a8d8df5d0af58f712a
Merge: 68acf7f 684b222
Author: Mike McCandless <mikemccand@apache.org>
Date:   Fri Mar 11 19:04:13 2016 -0500

    Merge branch 'master' into intords

commit 68acf7f9ee2e0249d90075bc035721c0a91619f7
Author: Mike McCandless <mikemccand@apache.org>
Date:   Fri Mar 11 19:04:01 2016 -0500

    rename to totalPointCount and add comment; enforce that the caller doesn't exceed what
they said; simplify the longOrds check to just compare to Integer.MAX_VALUE

commit afc964b56015475e8c354fdc8b0e05c7fa074ec2
Merge: db79e36 fe21f7a
Author: Mike McCandless <mikemccand@apache.org>
Date:   Fri Mar 11 10:17:09 2016 -0500

    Merge branch 'master' into intords

    Conflicts:
    	lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java

commit db79e365e097153a05813eaa70603c601bce1853
Author: Mike McCandless <mikemccand@apache.org>
Date:   Fri Mar 11 10:15:05 2016 -0500

    use int (4 bytes) not lon (8 bytes) if the number of points is less than ~2.1B


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a5b941ba
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a5b941ba
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a5b941ba

Branch: refs/heads/branch_6x
Commit: a5b941baa1aa907886760a247227ecf2cbc3eaa1
Parents: da67b56
Author: Mike McCandless <mikemccand@apache.org>
Authored: Sat Mar 12 05:11:51 2016 -0500
Committer: Mike McCandless <mikemccand@apache.org>
Committed: Sat Mar 12 05:14:38 2016 -0500

----------------------------------------------------------------------
 .../simpletext/SimpleTextPointsWriter.java      |  3 +-
 .../org/apache/lucene/codecs/PointsWriter.java  | 14 +++-
 .../codecs/lucene60/Lucene60PointsWriter.java   | 20 ++++-
 .../apache/lucene/index/PointValuesWriter.java  | 16 ++--
 .../org/apache/lucene/util/bkd/BKDWriter.java   | 83 +++++++++++++-------
 .../apache/lucene/util/bkd/HeapPointReader.java | 12 ++-
 .../apache/lucene/util/bkd/HeapPointWriter.java | 37 +++++++--
 .../lucene/util/bkd/OfflinePointReader.java     | 24 ++++--
 .../lucene/util/bkd/OfflinePointWriter.java     | 17 +++-
 .../org/apache/lucene/util/bkd/TestBKD.java     | 22 ++++--
 10 files changed, 178 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
index d2b848d..13494f5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
@@ -76,7 +76,8 @@ class SimpleTextPointsWriter extends PointsWriter {
                                           fieldInfo.getPointDimensionCount(),
                                           fieldInfo.getPointNumBytes(),
                                           BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
-                                          BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP) {
+                                          BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
+                                          values.size(fieldInfo.name)) {
 
         @Override
         protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues)
throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
index 53db281..000d713 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java
@@ -41,6 +41,17 @@ public abstract class PointsWriter implements Closeable {
    *  from the incoming segment.  The default codec overrides this for 1D fields and uses
    *  a faster but more complex implementation. */
   protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException
{
+    long maxPointCount = 0;
+    for (int i=0;i<mergeState.pointsReaders.length;i++) {
+      PointsReader pointsReader = mergeState.pointsReaders[i];
+      if (pointsReader != null) {
+        FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
+        if (readerFieldInfo != null) {
+          maxPointCount += pointsReader.size(fieldInfo.name);
+        }
+      }
+    }
+    final long finalMaxPointCount = maxPointCount;
     writeField(fieldInfo,
                new PointsReader() {
                  @Override
@@ -48,6 +59,7 @@ public abstract class PointsWriter implements Closeable {
                    if (fieldName.equals(fieldInfo.name) == false) {
                      throw new IllegalArgumentException("field name must match the field
being merged");
                    }
+                   
                    for (int i=0;i<mergeState.pointsReaders.length;i++) {
                      PointsReader pointsReader = mergeState.pointsReaders[i];
                      if (pointsReader == null) {
@@ -124,7 +136,7 @@ public abstract class PointsWriter implements Closeable {
 
                  @Override
                  public long size(String fieldName) {
-                   throw new UnsupportedOperationException();
+                   return finalMaxPointCount;
                  }
 
                  @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
index 3d09c45..1148fb0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
@@ -88,7 +88,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable
{
                                           fieldInfo.getPointDimensionCount(),
                                           fieldInfo.getPointNumBytes(),
                                           maxPointsInLeafNode,
-                                          maxMBSortInHeap)) {
+                                          maxMBSortInHeap,
+                                          values.size(fieldInfo.name))) {
 
       values.intersect(fieldInfo.name, new IntersectVisitor() {
           @Override
@@ -131,6 +132,20 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable
{
     for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
       if (fieldInfo.getPointDimensionCount() != 0) {
         if (fieldInfo.getPointDimensionCount() == 1) {
+
+          // Worst case total maximum size (if none of the points are deleted):
+          long totMaxSize = 0;
+          for(int i=0;i<mergeState.pointsReaders.length;i++) {
+            PointsReader reader = mergeState.pointsReaders[i];
+            if (reader != null) {
+              FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
+              FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
+              if (readerFieldInfo != null) {
+                totMaxSize += reader.size(fieldInfo.name);
+              }
+            }
+          }
+
           //System.out.println("MERGE: field=" + fieldInfo.name);
           // Optimize the 1D case to use BKDWriter.merge, which does a single merge sort
of the
           // already sorted incoming segments, instead of trying to sort all points again
as if
@@ -141,7 +156,8 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable
{
                                                 fieldInfo.getPointDimensionCount(),
                                                 fieldInfo.getPointNumBytes(),
                                                 maxPointsInLeafNode,
-                                                maxMBSortInHeap)) {
+                                                maxMBSortInHeap,
+                                                totMaxSize)) {
             List<BKDReader> bkdReaders = new ArrayList<>();
             List<MergeState.DocMap> docMaps = new ArrayList<>();
             List<Integer> docIDBases = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
index 35b9a90..694ccf6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
@@ -31,7 +31,7 @@ class PointValuesWriter {
   private final ByteBlockPool bytes;
   private final Counter iwBytesUsed;
   private int[] docIDs;
-  private int numDocs;
+  private int numPoints;
   private final byte[] packedValue;
 
   public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) {
@@ -51,13 +51,13 @@ class PointValuesWriter {
     if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes())
{
       throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value
has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
     }
-    if (docIDs.length == numDocs) {
-      docIDs = ArrayUtil.grow(docIDs, numDocs+1);
-      iwBytesUsed.addAndGet((docIDs.length - numDocs) * Integer.BYTES);
+    if (docIDs.length == numPoints) {
+      docIDs = ArrayUtil.grow(docIDs, numPoints+1);
+      iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
     }
     bytes.append(value);
-    docIDs[numDocs] = docID;
-    numDocs++;
+    docIDs[numPoints] = docID;
+    numPoints++;
   }
 
   public void flush(SegmentWriteState state, PointsWriter writer) throws IOException {
@@ -69,7 +69,7 @@ class PointValuesWriter {
                           if (fieldName.equals(fieldInfo.name) == false) {
                             throw new IllegalArgumentException("fieldName must be the same");
                           }
-                          for(int i=0;i<numDocs;i++) {
+                          for(int i=0;i<numPoints;i++) {
                             bytes.readBytes(packedValue.length * i, packedValue, 0, packedValue.length);
                             visitor.visit(docIDs[i], packedValue);
                           }
@@ -111,7 +111,7 @@ class PointValuesWriter {
 
                         @Override
                         public long size(String fieldName) {
-                          throw new UnsupportedOperationException();
+                          return numPoints;
                         }
 
                         @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index f1aba9d..bb2402b 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -128,12 +128,18 @@ public class BKDWriter implements Closeable {
 
   protected long pointCount;
 
-  public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
int bytesPerDim) throws IOException {
-    this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE,
DEFAULT_MAX_MB_SORT_IN_HEAP);
+  /** true if we have so many values that we must write ords using long (8 bytes) instead
of int (4 bytes) */
+  private final boolean longOrds;
+
+  /** An upper bound on how many points the caller will add (includes deletions) */
+  private final long totalPointCount;
+
+  public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
int bytesPerDim, long totalPointCount) throws IOException {
+    this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE,
DEFAULT_MAX_MB_SORT_IN_HEAP, totalPointCount);
   }
 
-  public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
-    verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap);
+  public BKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) throws
IOException {
+    verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
     // We use tracking dir to deal with removing files on exception, so each place that
     // creates temp files doesn't need crazy try/finally/sucess logic:
     this.tempDir = new TrackingDirectoryWrapper(tempDir);
@@ -141,6 +147,7 @@ public class BKDWriter implements Closeable {
     this.maxPointsInLeafNode = maxPointsInLeafNode;
     this.numDims = numDims;
     this.bytesPerDim = bytesPerDim;
+    this.totalPointCount = totalPointCount;
     docsSeen = new FixedBitSet(maxDoc);
     packedBytesLength = numDims * bytesPerDim;
 
@@ -153,8 +160,15 @@ public class BKDWriter implements Closeable {
     minPackedValue = new byte[packedBytesLength];
     maxPackedValue = new byte[packedBytesLength];
 
-    // dimensional values (numDims * bytesPerDim) + ord (long) + docID (int)
-    bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
+    // If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with
long (8 bytes), else we can use int (4 bytes).
+    longOrds = totalPointCount > Integer.MAX_VALUE;
+
+    // dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int)
+    if (longOrds) {
+      bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
+    } else {
+      bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES;
+    }
 
     // As we recurse, we compute temporary partitions of the data, halving the
     // number of points at each recursion.  Once there are few enough points,
@@ -173,12 +187,12 @@ public class BKDWriter implements Closeable {
     }
 
     // We write first maxPointsSortInHeap in heap, then cutover to offline for additional
points:
-    heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength);
+    heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds);
 
     this.maxMBSortInHeap = maxMBSortInHeap;
   }
 
-  public static void verifyParams(int numDims, int maxPointsInLeafNode, double maxMBSortInHeap)
{
+  public static void verifyParams(int numDims, int maxPointsInLeafNode, double maxMBSortInHeap,
long totalPointCount) {
     // We encode dim in a single byte in the splitPackedValues, but we only expose 4 bits
for it now, in case we want to use
     // remaining 4 bits for another purpose later
     if (numDims < 1 || numDims > MAX_DIMS) {
@@ -193,13 +207,16 @@ public class BKDWriter implements Closeable {
     if (maxMBSortInHeap < 0.0) {
       throw new IllegalArgumentException("maxMBSortInHeap must be >= 0.0 (got: " + maxMBSortInHeap
+ ")");
     }
+    if (totalPointCount < 0) {
+      throw new IllegalArgumentException("totalPointCount must be >=0 (got: " + totalPointCount
+ ")");
+    }
   }
 
   /** If the current segment has too many points then we switchover to temp files / offline
sort. */
   private void switchToOffline() throws IOException {
 
     // For each .add we just append to this input file, then in .finish we sort this input
and resursively build the tree:
-    offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength);
+    offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength,
longOrds);
     tempInput = offlinePointWriter.out;
     PointReader reader = heapPointWriter.getReader(0);
     for(int i=0;i<pointCount;i++) {
@@ -243,6 +260,9 @@ public class BKDWriter implements Closeable {
     }
 
     pointCount++;
+    if (pointCount > totalPointCount) {
+      throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed
when we were created, but we just hit " + pointCount + " values");
+    }
     docsSeen.set(docID);
   }
 
@@ -437,6 +457,9 @@ public class BKDWriter implements Closeable {
 
       assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue);
       valueCount++;
+      if (pointCount > totalPointCount) {
+        throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed
when we were created, but we just hit " + pointCount + " values");
+      }
 
       if (leafCount == 0) {
         if (leafBlockFPs.size() > 0) {
@@ -569,13 +592,10 @@ public class BKDWriter implements Closeable {
     new IntroSorter() {
       private final byte[] pivotPackedValue = new byte[bytesPerDim];
       private int pivotDocID;
-      private long pivotOrd;
 
       @Override
       protected void setPivot(int i) {
         pivotDocID = writer.docIDs[i];
-        pivotOrd = writer.ords[i];
-
         int block = i / writer.valuesPerBlock;
         int index = i % writer.valuesPerBlock;
         System.arraycopy(writer.blocks.get(block), index*packedBytesLength+dim*bytesPerDim,
pivotPackedValue, 0, bytesPerDim);
@@ -593,12 +613,7 @@ public class BKDWriter implements Closeable {
         }
 
         // Tie-break
-        cmp = Integer.compare(pivotDocID, writer.docIDs[j]);
-        if (cmp != 0) {
-          return cmp;
-        }
-
-        return Long.compare(pivotOrd, writer.ords[j]);
+        return Integer.compare(pivotDocID, writer.docIDs[j]);
       }
 
       @Override
@@ -607,9 +622,15 @@ public class BKDWriter implements Closeable {
         writer.docIDs[i] = writer.docIDs[j];
         writer.docIDs[j] = docID;
 
-        long ord = writer.ords[i];
-        writer.ords[i] = writer.ords[j];
-        writer.ords[j] = ord;
+        if (longOrds) {
+          long ord = writer.ordsLong[i];
+          writer.ordsLong[i] = writer.ordsLong[j];
+          writer.ordsLong[j] = ord;
+        } else {
+          int ord = writer.ords[i];
+          writer.ords[i] = writer.ords[j];
+          writer.ords[j] = ord;
+        }
 
         byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
         int indexI = (i % writer.valuesPerBlock) * packedBytesLength;
@@ -660,7 +681,7 @@ public class BKDWriter implements Closeable {
         sorted = heapPointWriter;
       } else {
         // Subsequent dims need a private copy
-        sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength);
+        sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength,
longOrds);
         sorted.copyFrom(heapPointWriter);
       }
 
@@ -691,10 +712,16 @@ public class BKDWriter implements Closeable {
           }
 
           // Tie-break by docID:
-          reader.reset(a.bytes, a.offset + packedBytesLength + Long.BYTES, a.length);
+          int offset;
+          if (longOrds) {
+            offset = Long.BYTES;
+          } else {
+            offset = Integer.BYTES;
+          }
+          reader.reset(a.bytes, a.offset + packedBytesLength + offset, a.length);
           final int docIDA = reader.readInt();
 
-          reader.reset(b.bytes, b.offset + packedBytesLength + Long.BYTES, b.length);
+          reader.reset(b.bytes, b.offset + packedBytesLength + offset, b.length);
           final int docIDB = reader.readInt();
 
           // No need to tie break on ord, for the case where the same doc has the same value
in a given dimension indexed more than once: it
@@ -746,7 +773,7 @@ public class BKDWriter implements Closeable {
 
       assert lastWriter[0] != null;
 
-      return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount);
+      return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount,
longOrds);
     }
   }
 
@@ -1005,7 +1032,7 @@ public class BKDWriter implements Closeable {
   private PathSlice switchToHeap(PathSlice source) throws IOException {
     int count = Math.toIntExact(source.count);
     try (
-       PointWriter writer = new HeapPointWriter(count, count, packedBytesLength);
+       PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds);
        PointReader reader = source.writer.getReader(source.start);
        ) {
       for(int i=0;i<count;i++) {
@@ -1214,9 +1241,9 @@ public class BKDWriter implements Closeable {
   PointWriter getPointWriter(long count) throws IOException {
     if (count <= maxPointsSortInHeap) {
       int size = Math.toIntExact(count);
-      return new HeapPointWriter(size, size, packedBytesLength);
+      return new HeapPointWriter(size, size, packedBytesLength, longOrds);
     } else {
-      return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength);
+      return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 081360b..b178f08 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -26,15 +26,17 @@ final class HeapPointReader implements PointReader {
   final List<byte[]> blocks;
   final int valuesPerBlock;
   final int packedBytesLength;
-  final long[] ords;
+  final long[] ordsLong;
+  final int[] ords;
   final int[] docIDs;
   final int end;
   final byte[] scratch;
 
-  HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, long[]
ords, int[] docIDs, int start, int end) {
+  HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[]
ords, long[] ordsLong, int[] docIDs, int start, int end) {
     this.blocks = blocks;
     this.valuesPerBlock = valuesPerBlock;
     this.ords = ords;
+    this.ordsLong = ordsLong;
     this.docIDs = docIDs;
     curRead = start-1;
     this.end = end;
@@ -76,7 +78,11 @@ final class HeapPointReader implements PointReader {
 
   @Override
   public long ord() {
-    return ords[curRead];
+    if (ordsLong != null) {
+      return ordsLong[curRead];
+    } else {
+      return ords[curRead];
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index 0236187..3b043d0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -23,7 +23,8 @@ import org.apache.lucene.util.ArrayUtil;
 
 final class HeapPointWriter implements PointWriter {
   int[] docIDs;
-  long[] ords;
+  long[] ordsLong;
+  int[] ords;
   private int nextWrite;
   private boolean closed;
   final int maxSize;
@@ -32,11 +33,15 @@ final class HeapPointWriter implements PointWriter {
   // NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
   final List<byte[]> blocks = new ArrayList<>();
 
-  public HeapPointWriter(int initSize, int maxSize, int packedBytesLength) {
+  public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds)
{
     docIDs = new int[initSize];
-    ords = new long[initSize];
     this.maxSize = maxSize;
     this.packedBytesLength = packedBytesLength;
+    if (longOrds) {
+      this.ordsLong = new long[initSize];
+    } else {
+      this.ords = new int[initSize];
+    }
     // 4K per page, unless each value is > 4K:
     valuesPerBlock = Math.max(1, 4096/packedBytesLength);
   }
@@ -46,7 +51,14 @@ final class HeapPointWriter implements PointWriter {
       throw new IllegalStateException("docIDs.length=" + docIDs.length + " other.nextWrite="
+ other.nextWrite);
     }
     System.arraycopy(other.docIDs, 0, docIDs, 0, other.nextWrite);
-    System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
+    if (other.ords != null) {
+      assert this.ords != null;
+      System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
+    } else {
+      assert this.ordsLong != null;
+      System.arraycopy(other.ordsLong, 0, ordsLong, 0, other.nextWrite);
+    }
+
     for(byte[] block : other.blocks) {
       blocks.add(block.clone());
     }
@@ -91,21 +103,30 @@ final class HeapPointWriter implements PointWriter {
   public void append(byte[] packedValue, long ord, int docID) {
     assert closed == false;
     assert packedValue.length == packedBytesLength;
-    if (ords.length == nextWrite) {
+    if (docIDs.length == nextWrite) {
       int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, Integer.BYTES));
       assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
-      ords = growExact(ords, nextSize);
       docIDs = growExact(docIDs, nextSize);
+      if (ordsLong != null) {
+        ordsLong = growExact(ordsLong, nextSize);
+      } else {
+        ords = growExact(ords, nextSize);
+      }
     }
     writePackedValue(nextWrite, packedValue);
-    ords[nextWrite] = ord;
+    if (ordsLong != null) {
+      ordsLong[nextWrite] = ord;
+    } else {
+      assert ord <= Integer.MAX_VALUE;
+      ords[nextWrite] = (int) ord;
+    }
     docIDs[nextWrite] = docID;
     nextWrite++;
   }
 
   @Override
   public PointReader getReader(long start) {
-    return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, docIDs, (int)
start, nextWrite);
+    return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong,
docIDs, (int) start, nextWrite);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 83d863b..3c4b8b5 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -30,18 +30,22 @@ final class OfflinePointReader implements PointReader {
   private final byte[] packedValue;
   private long ord;
   private int docID;
+  // true if ords are written as long (8 bytes), else 4 bytes
+  private boolean longOrds;
 
-  OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long
start, long length) throws IOException {
-    this(tempDir.openInput(tempFileName, IOContext.READONCE), packedBytesLength, start, length);
-  }
-
-  private OfflinePointReader(IndexInput in, int packedBytesLength, long start, long length)
throws IOException {
-    this.in = in;
-    int bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
+  OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long
start, long length, boolean longOrds) throws IOException {
+    in = tempDir.openInput(tempFileName, IOContext.READONCE);
+    int bytesPerDoc = packedBytesLength + Integer.BYTES;
+    if (longOrds) {
+      bytesPerDoc += Long.BYTES;
+    } else {
+      bytesPerDoc += Integer.BYTES;
+    }
     long seekFP = start * bytesPerDoc;
     in.seek(seekFP);
     this.countLeft = length;
     packedValue = new byte[packedBytesLength];
+    this.longOrds = longOrds;
   }
 
   @Override
@@ -58,7 +62,11 @@ final class OfflinePointReader implements PointReader {
       assert countLeft == -1;
       return false;
     }
-    ord = in.readLong();
+    if (longOrds) {
+      ord = in.readLong();
+    } else {
+      ord = in.readInt();
+    }
     docID = in.readInt();
     return true;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 625e6fa..dcf6781 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -30,27 +30,36 @@ final class OfflinePointWriter implements PointWriter {
   final int packedBytesLength;
   private long count;
   private boolean closed;
+  // true if ords are written as long (8 bytes), else 4 bytes
+  private boolean longOrds;
 
-  public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength)
throws IOException {
+  public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength,
boolean longOrds) throws IOException {
     this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
     this.tempDir = tempDir;
     this.packedBytesLength = packedBytesLength;
+    this.longOrds = longOrds;
   }
 
   /** Initializes on an already written/closed file, just so consumers can use {@link #getReader}
to read the file. */
-  public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long
count) {
+  public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long
count, boolean longOrds) {
     this.out = out;
     this.tempDir = tempDir;
     this.packedBytesLength = packedBytesLength;
     this.count = count;
     closed = true;
+    this.longOrds = longOrds;
   }
     
   @Override
   public void append(byte[] packedValue, long ord, int docID) throws IOException {
     assert packedValue.length == packedBytesLength;
     out.writeBytes(packedValue, 0, packedValue.length);
-    out.writeLong(ord);
+    if (longOrds) {
+      out.writeLong(ord);
+    } else {
+      assert ord <= Integer.MAX_VALUE;
+      out.writeInt((int) ord);
+    }
     out.writeInt(docID);
     count++;
   }
@@ -58,7 +67,7 @@ final class OfflinePointWriter implements PointWriter {
   @Override
   public PointReader getReader(long start) throws IOException {
     assert closed;
-    return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, count-start);
+    return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, count-start,
longOrds);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a5b941ba/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index f1402fc..2017743 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -40,9 +40,17 @@ import org.apache.lucene.util.TestUtil;
 
 public class TestBKD extends LuceneTestCase {
 
+  private long randomPointCount() {
+    if (random().nextBoolean()) {
+      return random().nextInt(Integer.MAX_VALUE);
+    } else {
+      return random().nextLong() & Long.MAX_VALUE;
+    }
+  }
+
   public void testBasicInts1D() throws Exception {
     try (Directory dir = getDirectory(100)) {
-      BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f);
+        BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, randomPointCount());
       byte[] scratch = new byte[4];
       for(int docID=0;docID<100;docID++) {
         NumericUtils.intToSortableBytes(docID, scratch, 0);
@@ -117,7 +125,7 @@ public class TestBKD extends LuceneTestCase {
       int numDims = TestUtil.nextInt(random(), 1, 5);
       int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
       float maxMB = (float) 3.0 + (3*random().nextFloat());
-      BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB);
+      BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB,
randomPointCount());
 
       if (VERBOSE) {
         System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
@@ -258,7 +266,7 @@ public class TestBKD extends LuceneTestCase {
       int numDims = TestUtil.nextInt(random(), 1, 5);
       int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
       float maxMB = (float) 3.0 + (3*random().nextFloat());
-      BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB);
+      BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB, randomPointCount());
       BigInteger[][] docs = new BigInteger[numDocs][];
 
       byte[] scratch = new byte[numBytesPerDim*numDims];
@@ -431,7 +439,7 @@ public class TestBKD extends LuceneTestCase {
   public void testTooLittleHeap() throws Exception { 
     try (Directory dir = getDirectory(0)) {
       IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, ()
-> {
-        new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001);
+        new BKDWriter(1, dir, "bkd", 1, 16, 1000000, 0.001, randomPointCount());
       });
       assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease
maxPointsInLeafNode"));
     }
@@ -554,7 +562,7 @@ public class TestBKD extends LuceneTestCase {
     List<Integer> docIDBases = null;
     int seg = 0;
 
-    BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB);
+    BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB, randomPointCount());
     IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
     IndexInput in = null;
 
@@ -608,7 +616,7 @@ public class TestBKD extends LuceneTestCase {
           seg++;
           maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
           maxMB = (float) 3.0 + (3*random().nextDouble());
-          w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB);
+          w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB, randomPointCount());
           lastDocIDBase = docID;
         }
       }
@@ -623,7 +631,7 @@ public class TestBKD extends LuceneTestCase {
         out.close();
         in = dir.openInput("bkd", IOContext.DEFAULT);
         seg++;
-        w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB);
+        w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode,
maxMB, randomPointCount());
         List<BKDReader> readers = new ArrayList<>();
         for(long fp : toMerge) {
           in.seek(fp);


Mime
View raw message