lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1693682 [1/2] - in /lucene/dev/trunk/lucene: ./ sandbox/src/java/org/apache/lucene/rangetree/ sandbox/src/resources/META-INF/services/ sandbox/src/test/org/apache/lucene/rangetree/
Date Sat, 01 Aug 2015 07:48:54 GMT
Author: mikemccand
Date: Sat Aug  1 07:48:53 2015
New Revision: 1693682

URL: http://svn.apache.org/r1693682
Log:
LUCENE-6697: add range tree for fast numeric and binary range filtering

Added:
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesFormat.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html   (with props)
    lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/rangetree/
    lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/rangetree/TestRangeTree.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1693682&r1=1693681&r2=1693682&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sat Aug  1 07:48:53 2015
@@ -141,6 +141,12 @@ New Features
 * LUCENE-6695: Added a new BlendedTermQuery to blend statistics across several
   terms. (Simon Willnauer, Adrien Grand)
 
+* LUCENE-6697: Add experimental range tree doc values format and
+  queries, based on a 1D version of the spatial BKD tree, for a faster
+  and smaller alternative to postings-based numeric and binary term
+  filtering.  Range trees can also handle values larger than 64 bits.
+  (Adrien Grand, Mike McCandless)
+
 API Changes
 
 * LUCENE-6508: Simplify Lock api, there is now just 

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/GrowingHeapSliceWriter.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,84 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+final class GrowingHeapSliceWriter implements SliceWriter {
+  long[] values;
+  int[] docIDs;
+  long[] ords;
+  private int nextWrite;
+  final int maxSize;
+
+  public GrowingHeapSliceWriter(int maxSize) {
+    values = new long[16];
+    docIDs = new int[16];
+    ords = new long[16];
+    this.maxSize = maxSize;
+  }
+
+  private int[] growExact(int[] arr, int size) {
+    assert size > arr.length;
+    int[] newArr = new int[size];
+    System.arraycopy(arr, 0, newArr, 0, arr.length);
+    return newArr;
+  }
+
+  private long[] growExact(long[] arr, int size) {
+    assert size > arr.length;
+    long[] newArr = new long[size];
+    System.arraycopy(arr, 0, newArr, 0, arr.length);
+    return newArr;
+  }
+
+  @Override
+  public void append(long value, long ord, int docID) {
+    assert ord == nextWrite;
+    if (values.length == nextWrite) {
+      int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
+      assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
+      values = growExact(values, nextSize);
+      ords = growExact(ords, nextSize);
+      docIDs = growExact(docIDs, nextSize);
+    }
+    values[nextWrite] = value;
+    ords[nextWrite] = ord;
+    docIDs[nextWrite] = docID;
+    nextWrite++;
+  }
+
+  @Override
+  public SliceReader getReader(long start) {
+    return new HeapSliceReader(values, ords, docIDs, (int) start, nextWrite);
+  }
+
+  @Override
+  public void close() {
+  }
+
+  @Override
+  public void destroy() {
+  }
+
+  @Override
+  public String toString() {
+    return "GrowingHeapSliceWriter(count=" + nextWrite + " alloc=" + values.length + ")";
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceReader.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,60 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class HeapSliceReader implements SliceReader {
+  private int curRead;
+  final long[] values;
+  final long[] ords;
+  final int[] docIDs;
+  final int end;
+
+  HeapSliceReader(long[] values, long[] ords, int[] docIDs, int start, int end) {
+    this.values = values;
+    this.ords = ords;
+    this.docIDs = docIDs;
+    curRead = start-1;
+    this.end = end;
+  }
+
+  @Override
+  public boolean next() {
+    curRead++;
+    return curRead < end;
+  }
+
+  @Override
+  public long value() {
+    return values[curRead];
+  }
+
+  @Override
+  public int docID() {
+    return docIDs[curRead];
+  }
+
+  @Override
+  public long ord() {
+    return ords[curRead];
+  }
+
+  @Override
+  public void close() {
+  }
+}
+

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/HeapSliceWriter.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,60 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class HeapSliceWriter implements SliceWriter {
+  final long[] values;
+  final int[] docIDs;
+  final long[] ords;
+  private int nextWrite;
+
+  public HeapSliceWriter(int count) {
+    values = new long[count];
+    docIDs = new int[count];
+    ords = new long[count];
+  }
+
+  @Override
+  public void append(long value, long ord, int docID) {
+    values[nextWrite] = value;
+    ords[nextWrite] = ord;
+    docIDs[nextWrite] = docID;
+    nextWrite++;
+  }
+
+  @Override
+  public SliceReader getReader(long start) {
+    return new HeapSliceReader(values, ords, docIDs, (int) start, values.length);
+  }
+
+  @Override
+  public void close() {
+    if (nextWrite != values.length) {
+      throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + values.length);
+    }
+  }
+
+  @Override
+  public void destroy() {
+  }
+
+  @Override
+  public String toString() {
+    return "HeapSliceWriter(count=" + values.length + ")";
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/NumericRangeTreeQuery.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,159 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.ToStringUtils;
+
+import java.io.IOException;
+
+/** Finds all previously indexed long values that fall within the specified range.
+ *
+ * <p>The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedNumericDocValuesField} added per document.
+ *
+ * @lucene.experimental */
+
+public class NumericRangeTreeQuery extends Query {
+  final String field;
+  final Long minValue;
+  final Long maxValue;
+  final boolean minInclusive;
+  final boolean maxInclusive;
+
+  // TODO: sugar for all numeric conversions?
+
+  /** Matches all values in the specified long range. */ 
+  public NumericRangeTreeQuery(String field, Long minValue, boolean minInclusive, Long maxValue, boolean maxInclusive) {
+    this.field = field;
+    this.minInclusive = minInclusive;
+    this.minValue = minValue;
+    this.maxInclusive = maxInclusive;
+    this.maxValue = maxValue;
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+
+    // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
+    // used in the first pass:
+
+    return new ConstantScoreWeight(this) {
+
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
+        LeafReader reader = context.reader();
+        SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
+        if (sdv == null) {
+          // No docs in this segment had this field
+          return null;
+        }
+
+        if (sdv instanceof RangeTreeSortedNumericDocValues == false) {
+          throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + sdv);
+        }
+        RangeTreeSortedNumericDocValues treeDV = (RangeTreeSortedNumericDocValues) sdv;
+        RangeTreeReader tree = treeDV.getRangeTreeReader();
+
+        // lower
+        long minBoundIncl = (minValue == null) ? Long.MIN_VALUE : minValue.longValue();
+
+        if (minInclusive == false && minValue != null) {
+          if (minBoundIncl == Long.MAX_VALUE) {
+            return null;
+          }
+          minBoundIncl++;
+        }
+          
+        // upper
+        long maxBoundIncl = (maxValue == null) ? Long.MAX_VALUE : maxValue.longValue();
+        if (maxInclusive == false && maxValue != null) {
+          if (maxBoundIncl == Long.MIN_VALUE) {
+            return null;
+          }
+          maxBoundIncl--;
+        }
+
+        if (maxBoundIncl < minBoundIncl) {
+          return null;
+        }
+
+        DocIdSet result = tree.intersect(minBoundIncl, maxBoundIncl, treeDV.delegate, context.reader().maxDoc());
+
+        final DocIdSetIterator disi = result.iterator();
+
+        return new ConstantScoreScorer(this, score(), disi);
+      }
+    };
+  }
+
+  @Override
+  public int hashCode() {
+    int hash = super.hashCode();
+    if (minValue != null) hash += minValue.hashCode()^0x14fa55fb;
+    if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe;
+    return hash +
+      (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
+      (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (super.equals(other)) {
+      final NumericRangeTreeQuery q = (NumericRangeTreeQuery) other;
+      return (
+        (q.minValue == null ? minValue == null : q.minValue.equals(minValue)) &&
+        (q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) &&
+        minInclusive == q.minInclusive &&
+        maxInclusive == q.maxInclusive
+      );
+    }
+
+    return false;
+  }
+
+  @Override
+  public String toString(String field) {
+    final StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName());
+    sb.append(':');
+    if (this.field.equals(field) == false) {
+      sb.append("field=");
+      sb.append(this.field);
+      sb.append(':');
+    }
+
+    return sb.append(minInclusive ? '[' : '{')
+      .append((minValue == null) ? "*" : minValue.toString())
+      .append(" TO ")
+      .append((maxValue == null) ? "*" : maxValue.toString())
+      .append(maxInclusive ? ']' : '}')
+      .append(ToStringUtils.boost(getBoost()))
+      .toString();
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceReader.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,82 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.store.InputStreamDataInput;
+
+final class OfflineSliceReader implements SliceReader {
+  final InputStreamDataInput in;
+  long countLeft;
+  private long value;
+  private long ord;
+  private int docID;
+
+  OfflineSliceReader(Path tempFile, long start, long count) throws IOException {
+    InputStream fis = Files.newInputStream(tempFile);
+    long seekFP = start * RangeTreeWriter.BYTES_PER_DOC;
+    long skipped = 0;
+    while (skipped < seekFP) {
+      long inc = fis.skip(seekFP - skipped);
+      skipped += inc;
+      if (inc == 0) {
+        throw new RuntimeException("skip returned 0");
+      }
+    }
+    in = new InputStreamDataInput(new BufferedInputStream(fis));
+    this.countLeft = count;
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    if (countLeft == 0) {
+      return false;
+    }
+    countLeft--;
+    value = in.readLong();
+    ord = in.readLong();
+    docID = in.readInt();
+    return true;
+  }
+
+  @Override
+  public long value() {
+    return value;
+  }
+
+  @Override
+  public long ord() {
+    return ord;
+  }
+
+  @Override
+  public int docID() {
+    return docID;
+  }
+
+  @Override
+  public void close() throws IOException {
+    in.close();
+  }
+}
+

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/OfflineSliceWriter.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,75 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.IOUtils;
+
+final class OfflineSliceWriter implements SliceWriter {
+
+  final Path tempFile;
+  final byte[] scratchBytes = new byte[RangeTreeWriter.BYTES_PER_DOC];
+  final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);      
+  final OutputStreamDataOutput out;
+  final long count;
+  private long countWritten;
+
+  public OfflineSliceWriter(Path tempDir, long count) throws IOException {
+    tempFile = Files.createTempFile(tempDir, "size" + count + ".", "");
+    out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
+    this.count = count;
+  }
+    
+  @Override
+  public void append(long value, long ord, int docID) throws IOException {
+    out.writeLong(value);
+    out.writeLong(ord);
+    out.writeInt(docID);
+    countWritten++;
+  }
+
+  @Override
+  public SliceReader getReader(long start) throws IOException {
+    return new OfflineSliceReader(tempFile, start, count-start);
+  }
+
+  @Override
+  public void close() throws IOException {
+    out.close();
+    if (count != countWritten) {
+      throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
+    }
+  }
+
+  @Override
+  public void destroy() throws IOException {
+    IOUtils.rm(tempFile);
+  }
+
+  @Override
+  public String toString() {
+    return "OfflineSliceWriter(count=" + count + " tempFile=" + tempFile + ")";
+  }
+}
+

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesConsumer.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,148 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
+  final DocValuesConsumer delegate;
+  final int maxPointsInLeafNode;
+  final int maxPointsSortInHeap;
+  final IndexOutput out;
+  final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
+  final SegmentWriteState state;
+
+  public RangeTreeDocValuesConsumer(DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
+    RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
+    this.delegate = delegate;
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.maxPointsSortInHeap = maxPointsSortInHeap;
+    this.state = state;
+    String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION);
+    out = state.directory.createOutput(datFileName, state.context);
+    CodecUtil.writeIndexHeader(out, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT,
+                               state.segmentInfo.getId(), state.segmentSuffix);
+  }
+
+  @Override
+  public void close() throws IOException {
+    boolean success = false;
+    try {
+      CodecUtil.writeFooter(out);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(delegate, out);
+      } else {
+        IOUtils.closeWhileHandlingException(delegate, out);
+      }
+    }
+    
+    String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION);
+    IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
+    success = false;
+    try {
+      CodecUtil.writeIndexHeader(metaOut, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_CURRENT,
+                                 state.segmentInfo.getId(), state.segmentSuffix);
+      metaOut.writeVInt(fieldIndexFPs.size());
+      for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {       
+        metaOut.writeVInt(ent.getKey());
+        metaOut.writeVLong(ent.getValue());
+      }
+      CodecUtil.writeFooter(metaOut);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(metaOut);
+      } else {
+        IOUtils.closeWhileHandlingException(metaOut);
+      }
+    }
+  }
+
+  @Override
+  public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
+    delegate.addSortedNumericField(field, docToValueCount, values);
+    RangeTreeWriter writer = new RangeTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
+    Iterator<Number> valueIt = values.iterator();
+    Iterator<Number> valueCountIt = docToValueCount.iterator();
+    //System.out.println("\nSNF: field=" + field.name);
+    for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
+      assert valueCountIt.hasNext();
+      int count = valueCountIt.next().intValue();
+      for(int i=0;i<count;i++) {
+        assert valueIt.hasNext();
+        writer.add(valueIt.next().longValue(), docID);
+      }
+    }
+
+    long indexStartFP = writer.finish(out);
+
+    fieldIndexFPs.put(field.number, indexStartFP);
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+    throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) {
+    throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
+  }
+
+  @Override
+  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
+    throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
+  }
+
+  @Override
+  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+    delegate.addSortedSetField(field, values, docToOrdCount, ords);
+    RangeTreeWriter writer = new RangeTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
+    Iterator<Number> docToOrdCountIt = docToOrdCount.iterator();
+    Iterator<Number> ordsIt = ords.iterator();
+    //System.out.println("\nSSF: field=" + field.name);
+    for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
+      assert docToOrdCountIt.hasNext();
+      int count = docToOrdCountIt.next().intValue();
+      for(int i=0;i<count;i++) {
+        assert ordsIt.hasNext();
+        long ord = ordsIt.next().longValue();
+        writer.add(ord, docID);
+      }
+    }
+
+    long indexStartFP = writer.finish(out);
+
+    fieldIndexFPs.put(field.number, indexStartFP);
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesFormat.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesFormat.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesFormat.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,112 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+import java.io.IOException;
+
+/**
+ * A {@link DocValuesFormat} to efficiently index numeric values from
+ * from {@link SortedNumericDocValuesField} or BytesRef values from {@link SortedSetDocValuesField}
+ * for numeric range queries using ({@link NumericRangeTreeQuery}) and arbitrary binary
+ * range queries using {@link SortedSetRangeTreeQuery}.
+ *
+ * <p>This wraps {@link Lucene50DocValuesFormat}, but saves its own numeric tree
+ * structures to disk for fast query-time intersection. See <a
+ * href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
+ * for details.
+ *
+ * <p>The numeric tree slices up 1D space into smaller and
+ * smaller ranges, until the smallest ranges have approximately
+ * between X/2 and X (X default is 1024) values in them, at which point
+ * such leaf cells are written as a block to disk, while the index tree
+ * structure records how space was sub-divided is loaded into HEAP
+ * at search time.  At search time, the tree is recursed based on whether
+ * each of left or right child overlap with the query range, and once
+ * a leaf block is reached, all documents in that leaf block are collected
+ * if the cell is fully enclosed by the query shape, or filtered and then
+ * collected, if not.
+ *
+ * <p>The index is also quite compact, because docs only appear once in
+ * the tree (no "prefix terms").
+ *
+ * <p>In addition to the files written by {@link Lucene50DocValuesFormat}, this format writes:
+ * <ol>
+ *   <li><tt>.ndd</tt>: numeric tree leaf data and index</li>
+ *   <li><tt>.ndm</tt>: numeric tree metadata</li>
+ * </ol>
+ *
+ * <p>The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs!
+ *
+ * @lucene.experimental */
+
+public class RangeTreeDocValuesFormat extends DocValuesFormat {
+
+  static final String DATA_CODEC_NAME = "NumericTreeData";
+  static final int DATA_VERSION_START = 0;
+  static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
+  static final String DATA_EXTENSION = "ndd";
+
+  static final String META_CODEC_NAME = "NumericTreeMeta";
+  static final int META_VERSION_START = 0;
+  static final int META_VERSION_CURRENT = META_VERSION_START;
+  static final String META_EXTENSION = "ndm";
+
+  private final int maxPointsInLeafNode;
+  private final int maxPointsSortInHeap;
+  
+  private final DocValuesFormat delegate = new Lucene50DocValuesFormat();
+
+  /** Default constructor */
+  public RangeTreeDocValuesFormat() {
+    this(RangeTreeWriter.DEFAULT_MAX_VALUES_IN_LEAF_NODE, RangeTreeWriter.DEFAULT_MAX_VALUES_SORT_IN_HEAP);
+  }
+
+  /** Creates this with custom configuration.
+   *
+   * @param maxPointsInLeafNode Maximum number of points in each leaf cell.  Smaller values create a deeper tree with larger in-heap index and possibly
+   *    faster searching.  The default is 1024.
+   * @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used.  When the number of points exceeds this, a (slower)
+   *    offline sort is used.  The default is 128 * 1024.
+   *
+   * @lucene.experimental */
+  public RangeTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) {
+    super("NumericTree");
+    RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.maxPointsSortInHeap = maxPointsSortInHeap;
+  }
+
+  @Override
+  public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
+    return new RangeTreeDocValuesConsumer(delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
+  }
+
+  @Override
+  public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+    return new RangeTreeDocValuesProducer(delegate.fieldsProducer(state), state);
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeDocValuesProducer.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,190 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+
+class RangeTreeDocValuesProducer extends DocValuesProducer {
+
+  private final Map<String,RangeTreeReader> treeReaders = new HashMap<>();
+  private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
+
+  private final IndexInput datIn;
+  private final AtomicLong ramBytesUsed;
+  private final int maxDoc;
+  private final DocValuesProducer delegate;
+  private final boolean merging;
+
+  public RangeTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
+    String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION);
+    ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
+    CodecUtil.checkIndexHeader(metaIn, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_START, RangeTreeDocValuesFormat.META_VERSION_CURRENT,
+                               state.segmentInfo.getId(), state.segmentSuffix);
+    int fieldCount = metaIn.readVInt();
+    for(int i=0;i<fieldCount;i++) {
+      int fieldNumber = metaIn.readVInt();
+      long indexFP = metaIn.readVLong();
+      fieldToIndexFPs.put(fieldNumber, indexFP);
+    }
+    CodecUtil.checkFooter(metaIn);
+    metaIn.close();
+
+    String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION);
+    datIn = state.directory.openInput(datFileName, state.context);
+    CodecUtil.checkIndexHeader(datIn, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_START, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT,
+                               state.segmentInfo.getId(), state.segmentSuffix);
+    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
+    maxDoc = state.segmentInfo.maxDoc();
+    this.delegate = delegate;
+    merging = false;
+  }
+
+  // clone for merge: we don't hang onto the NumericTrees we load
+  RangeTreeDocValuesProducer(RangeTreeDocValuesProducer orig) throws IOException {
+    assert Thread.holdsLock(orig);
+    datIn = orig.datIn.clone();
+    ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
+    delegate = orig.delegate.getMergeInstance();
+    fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
+    treeReaders.putAll(orig.treeReaders);
+    merging = true;
+    maxDoc = orig.maxDoc;
+  }
+
+  @Override
+  public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
+    RangeTreeReader treeReader = treeReaders.get(field.name);
+    if (treeReader == null) {
+      // Lazy load
+      Long fp = fieldToIndexFPs.get(field.number);
+      // FieldInfos checks has already ensured we are a DV field of this type, and Codec ensures
+      // this DVFormat was used at write time:
+      assert fp != null;
+      datIn.seek(fp);
+      treeReader = new RangeTreeReader(datIn);
+
+      // Only hang onto the reader when we are not merging:
+      if (merging == false) {
+        treeReaders.put(field.name, treeReader);
+        ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
+      }
+    }
+
+    return new RangeTreeSortedNumericDocValues(treeReader, delegate.getSortedNumeric(field));
+  }
+
+  @Override
+  public void close() throws IOException {
+    IOUtils.close(datIn, delegate);
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    CodecUtil.checksumEntireFile(datIn);
+  }
+
+  @Override
+  public NumericDocValues getNumeric(FieldInfo field) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public BinaryDocValues getBinary(FieldInfo field) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public SortedDocValues getSorted(FieldInfo field) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public synchronized SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+    RangeTreeReader treeReader = treeReaders.get(field.name);
+    if (treeReader == null) {
+      // Lazy load
+      Long fp = fieldToIndexFPs.get(field.number);
+
+      // FieldInfos checks has already ensured we are a DV field of this type, and Codec ensures
+      // this DVFormat was used at write time:
+      assert fp != null;
+
+      datIn.seek(fp);
+      //System.out.println("load field=" + field.name);
+      treeReader = new RangeTreeReader(datIn);
+
+      // Only hang onto the reader when we are not merging:
+      if (merging == false) {
+        treeReaders.put(field.name, treeReader);
+        ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
+      }
+    }
+
+    return new RangeTreeSortedSetDocValues(treeReader, delegate.getSortedSet(field));
+  }
+
+  @Override
+  public Bits getDocsWithField(FieldInfo field) throws IOException {
+    return delegate.getDocsWithField(field);
+  }
+
+  @Override
+  public synchronized Collection<Accountable> getChildResources() {
+    List<Accountable> resources = new ArrayList<>();
+    for(Map.Entry<String,RangeTreeReader> ent : treeReaders.entrySet()) {
+      resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
+    }
+    resources.add(Accountables.namedAccountable("delegate", delegate));
+
+    return resources;
+  }
+
+  @Override
+  public synchronized DocValuesProducer getMergeInstance() throws IOException {
+    return new RangeTreeDocValuesProducer(this);
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return ramBytesUsed.get() + delegate.ramBytesUsed();
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeReader.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,202 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/** Handles intersection of a range with a numeric tree previously written with {@link RangeTreeWriter}.
+ *
+ * @lucene.experimental */
+
+final class RangeTreeReader implements Accountable {
+  final private long[] blockFPs;
+  final private long[] blockMinValues;
+  final IndexInput in;
+  final long globalMaxValue;
+  final int approxDocsPerBlock;
+
+  public RangeTreeReader(IndexInput in) throws IOException {
+
+    // Read index:
+    int numLeaves = in.readVInt();
+    approxDocsPerBlock = in.readVInt();
+
+    blockMinValues = new long[numLeaves];
+    for(int i=0;i<numLeaves;i++) {
+      blockMinValues[i] = in.readLong();
+    }
+    blockFPs = new long[numLeaves];
+    for(int i=0;i<numLeaves;i++) {
+      blockFPs[i] = in.readVLong();
+    }
+    globalMaxValue = in.readLong();
+
+    this.in = in;
+  }
+
+  public long getMinValue() {
+    return blockMinValues[0];
+  }
+
+  public long getMaxValue() {
+    return globalMaxValue;
+  }
+
+  private static final class QueryState {
+    final IndexInput in;
+    final DocIdSetBuilder docs;
+    final long minValueIncl;
+    final long maxValueIncl;
+    final SortedNumericDocValues sndv;
+
+    public QueryState(IndexInput in, int maxDoc,
+                      long minValueIncl, long maxValueIncl,
+                      SortedNumericDocValues sndv) {
+      this.in = in;
+      this.docs = new DocIdSetBuilder(maxDoc);
+      this.minValueIncl = minValueIncl;
+      this.maxValueIncl = maxValueIncl;
+      this.sndv = sndv;
+    }
+  }
+
+  public DocIdSet intersect(long minIncl, long maxIncl, SortedNumericDocValues sndv, int maxDoc) throws IOException {
+
+    if (minIncl > maxIncl) {
+      return DocIdSet.EMPTY;
+    }
+
+    if (minIncl > globalMaxValue || maxIncl < blockMinValues[0]) {
+      return DocIdSet.EMPTY;
+    }
+
+    QueryState state = new QueryState(in.clone(), maxDoc,
+                                      minIncl, maxIncl,
+                                      sndv);
+
+    int startBlockIncl = Arrays.binarySearch(blockMinValues, minIncl);
+    if (startBlockIncl >= 0) {
+      // There can be dups here, when the same value is added many
+      // times.  Also, we need the first block whose min is < minIncl:
+      while (startBlockIncl > 0 && blockMinValues[startBlockIncl] == minIncl) {
+        startBlockIncl--;
+      }
+    } else {
+      startBlockIncl = Math.max(-startBlockIncl-2, 0);
+    }
+
+    int endBlockIncl = Arrays.binarySearch(blockMinValues, maxIncl);
+    if (endBlockIncl >= 0) {
+      // There can be dups here, when the same value is added many
+      // times.  Also, we need the first block whose max is > minIncl:
+      while (endBlockIncl < blockMinValues.length-1 && blockMinValues[endBlockIncl] == maxIncl) {
+        endBlockIncl++;
+      }
+    } else {
+      endBlockIncl = Math.max(-endBlockIncl-2, 0);
+    }
+
+    assert startBlockIncl <= endBlockIncl;
+
+    state.in.seek(blockFPs[startBlockIncl]);
+
+    //System.out.println("startBlockIncl=" + startBlockIncl + " endBlockIncl=" + endBlockIncl);
+
+    // Rough estimate of how many hits we'll see.  Note that in the degenerate case
+    // (index same value many times) this could be a big over-estimate, but in the typical
+    // case it's good:
+    state.docs.grow(approxDocsPerBlock * (endBlockIncl - startBlockIncl + 1));
+
+    int hitCount = 0;
+    for (int block=startBlockIncl;block<=endBlockIncl;block++) {
+      boolean doFilter = blockMinValues[block] <= minIncl || block == blockMinValues.length-1 || blockMinValues[block+1] >= maxIncl;
+      //System.out.println("  block=" + block + " min=" + blockMinValues[block] + " doFilter=" + doFilter);
+
+      int newCount;
+      if (doFilter) {
+        // We must filter each hit:
+        newCount = addSome(state);
+      } else {
+        newCount = addAll(state);
+      }
+
+      hitCount += newCount;
+    }
+
+    // NOTE: hitCount is an over-estimate in the multi-valued case:
+    return state.docs.build(hitCount);
+  }
+
+  /** Adds all docs from the current block. */
+  private int addAll(QueryState state) throws IOException {
+    // How many values are stored in this leaf cell:
+    int count = state.in.readVInt();
+    state.docs.grow(count);
+    for(int i=0;i<count;i++) {
+      int docID = state.in.readInt();
+      state.docs.add(docID);
+    }
+
+    return count;
+  }
+
+  /** Adds docs from the current block, filtering each hit against the query min/max.  This
+   *  is only needed on the boundary blocks. */
+  private int addSome(QueryState state) throws IOException {
+    int hitCount = 0;
+
+    // How many points are stored in this leaf cell:
+    int count = state.in.readVInt();
+    state.docs.grow(count);
+    for(int i=0;i<count;i++) {
+      int docID = state.in.readInt();
+      state.sndv.setDocument(docID);
+
+      // How many values this doc has:
+      int docValueCount = state.sndv.count();
+
+      for(int j=0;j<docValueCount;j++) {
+        long value = state.sndv.valueAt(j);
+
+        if (value >= state.minValueIncl && value <= state.maxValueIncl) {
+          state.docs.add(docID);
+          hitCount++;
+
+          // Stop processing values for this doc:
+          break;
+        }
+      }
+    }
+
+    return hitCount;
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return blockMinValues.length * RamUsageEstimator.NUM_BYTES_LONG + 
+      blockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedNumericDocValues.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,49 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.SortedNumericDocValues;
+
+class RangeTreeSortedNumericDocValues extends SortedNumericDocValues {
+  final RangeTreeReader rangeTreeReader;
+  final SortedNumericDocValues delegate;
+
+  public RangeTreeSortedNumericDocValues(RangeTreeReader rangeTreeReader, SortedNumericDocValues delegate) {
+    this.rangeTreeReader = rangeTreeReader;
+    this.delegate = delegate;
+  }
+
+  public RangeTreeReader getRangeTreeReader() {
+    return rangeTreeReader;
+  }
+
+  @Override
+  public void setDocument(int doc) {
+    delegate.setDocument(doc);
+  }
+
+  @Override
+  public long valueAt(int index) {
+    return delegate.valueAt(index);
+  }
+
+  @Override
+  public int count() {
+    return delegate.count();
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeSortedSetDocValues.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,66 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+
+class RangeTreeSortedSetDocValues extends SortedSetDocValues {
+  final RangeTreeReader rangeTreeReader;
+  final SortedSetDocValues delegate;
+
+  public RangeTreeSortedSetDocValues(RangeTreeReader rangeTreeReader, SortedSetDocValues delegate) {
+    this.rangeTreeReader = rangeTreeReader;
+    this.delegate = delegate;
+  }
+
+  public RangeTreeReader getRangeTreeReader() {
+    return rangeTreeReader;
+  }
+
+  @Override
+  public long nextOrd() {
+    return delegate.nextOrd();
+  }
+
+  @Override
+  public void setDocument(int doc) {
+    delegate.setDocument(doc);
+  }
+
+  @Override
+  public BytesRef lookupOrd(long ord) {
+    return delegate.lookupOrd(ord);
+  }
+
+  @Override
+  public long getValueCount() {
+    return delegate.getValueCount();
+  }
+
+  @Override
+  public long lookupTerm(BytesRef key) {
+    return delegate.lookupTerm(key);
+  }
+
+  @Override
+  public TermsEnum termsEnum() {
+    return delegate.termsEnum();
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/RangeTreeWriter.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,591 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Comparator;
+
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// TODO
+//   - could we just "use postings" to map leaf -> docIDs?
+//   - we could also index "auto-prefix terms" here, and use better compression
+//   - the index could be efficiently encoded as an FST, so we don't have wasteful
+//     (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
+//     the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
+//     per leaf, and you can reduce that by putting more points per leaf
+//   - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
+
+/** Recursively builds a 1d BKD tree to assign all incoming {@code long} values to smaller
+ *  and smaller ranges until the number of points in a given
+ *  range is &lt= the <code>maxPointsInLeafNode</code>.  The tree is
+ *  fully balanced, which means the leaf nodes will have between 50% and 100% of
+ *  the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
+ *  of indexing exactly the same value many times.
+ *
+ *  <p>
+ *  See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
+ *
+ *  <p>This consumes heap during writing: for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
+ *  the points in memory as simple java arrays.
+ *
+ *  <p>
+ *  <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total values,
+ *  which should be plenty since a Lucene index can have at most Integer.MAX_VALUE-1 documents.
+ *
+ * @lucene.experimental */
+
+class RangeTreeWriter {
+
+  // value (long) + ord (long) + docID (int)
+  static final int BYTES_PER_DOC = 2 * RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
+
+  public static final int DEFAULT_MAX_VALUES_IN_LEAF_NODE = 1024;
+
+  /** This works out to max of ~10 MB peak heap tied up during writing: */
+  public static final int DEFAULT_MAX_VALUES_SORT_IN_HEAP = 128*1024;;
+
+  private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
+  private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
+
+  private OfflineSorter.ByteSequencesWriter writer;
+  private GrowingHeapSliceWriter heapWriter;
+
+  private Path tempInput;
+  private Path tempDir;
+  private final int maxValuesInLeafNode;
+  private final int maxValuesSortInHeap;
+
+  private long valueCount;
+  private long globalMinValue = Long.MAX_VALUE;
+  private long globalMaxValue = Long.MIN_VALUE;
+
+  public RangeTreeWriter() throws IOException {
+    this(DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
+  }
+
+  // TODO: instead of maxValuesSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
+  public RangeTreeWriter(int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
+    verifyParams(maxValuesInLeafNode, maxValuesSortInHeap);
+    this.maxValuesInLeafNode = maxValuesInLeafNode;
+    this.maxValuesSortInHeap = maxValuesSortInHeap;
+
+    // We write first maxValuesSortInHeap in heap, then cutover to offline for additional points:
+    heapWriter = new GrowingHeapSliceWriter(maxValuesSortInHeap);
+  }
+
+  public static void verifyParams(int maxValuesInLeafNode, int maxValuesSortInHeap) {
+    if (maxValuesInLeafNode <= 0) {
+      throw new IllegalArgumentException("maxValuesInLeafNode must be > 0; got " + maxValuesInLeafNode);
+    }
+    if (maxValuesInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalArgumentException("maxValuesInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesInLeafNode);
+    }
+    if (maxValuesSortInHeap < maxValuesInLeafNode) {
+      throw new IllegalArgumentException("maxValuesSortInHeap must be >= maxValuesInLeafNode; got " + maxValuesSortInHeap + " vs maxValuesInLeafNode="+ maxValuesInLeafNode);
+    }
+    if (maxValuesSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalArgumentException("maxValuesSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesSortInHeap);
+    }
+  }
+
+  /** If the current segment has too many points then we switchover to temp files / offline sort. */
+  private void switchToOffline() throws IOException {
+
+    // OfflineSorter isn't thread safe, but our own private tempDir works around this:
+    tempDir = Files.createTempDirectory(OfflineSorter.defaultTempDir(), RangeTreeWriter.class.getSimpleName());
+
+    // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
+    tempInput = tempDir.resolve("in");
+    writer = new OfflineSorter.ByteSequencesWriter(tempInput);
+    for(int i=0;i<valueCount;i++) {
+      scratchBytesOutput.reset(scratchBytes);
+      scratchBytesOutput.writeLong(heapWriter.values[i]);
+      scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
+      scratchBytesOutput.writeVLong(i);
+      // TODO: can/should OfflineSorter optimize the fixed-width case?
+      writer.write(scratchBytes, 0, scratchBytes.length);
+    }
+
+    heapWriter = null;
+  }
+
+  void add(long value, int docID) throws IOException {
+    if (valueCount >= maxValuesSortInHeap) {
+      if (writer == null) {
+        switchToOffline();
+      }
+      scratchBytesOutput.reset(scratchBytes);
+      scratchBytesOutput.writeLong(value);
+      scratchBytesOutput.writeVInt(docID);
+      scratchBytesOutput.writeVLong(valueCount);
+      writer.write(scratchBytes, 0, scratchBytes.length);
+    } else {
+      // Not too many points added yet, continue using heap:
+      heapWriter.append(value, valueCount, docID);
+    }
+
+    valueCount++;
+    globalMaxValue = Math.max(value, globalMaxValue);
+    globalMinValue = Math.min(value, globalMinValue);
+  }
+
+  /** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
+   *  as we recurse in {@link #build}. */
+  private SliceWriter convertToFixedWidth(Path in) throws IOException {
+    BytesRefBuilder scratch = new BytesRefBuilder();
+    scratch.grow(BYTES_PER_DOC);
+    BytesRef bytes = scratch.get();
+    ByteArrayDataInput dataReader = new ByteArrayDataInput();
+
+    OfflineSorter.ByteSequencesReader reader = null;
+    SliceWriter sortedWriter = null;
+    boolean success = false;
+    try {
+      reader = new OfflineSorter.ByteSequencesReader(in);
+      sortedWriter = getWriter(valueCount);
+      for (long i=0;i<valueCount;i++) {
+        boolean result = reader.read(scratch);
+        assert result;
+        dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
+        long value = dataReader.readLong();
+        int docID = dataReader.readVInt();
+        assert docID >= 0: "docID=" + docID;
+        long ord = dataReader.readVLong();
+        sortedWriter.append(value, ord, docID);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(sortedWriter, reader);
+      } else {
+        IOUtils.closeWhileHandlingException(reader);
+        try {
+          sortedWriter.destroy();
+        } catch (Throwable t) {
+          // Suppress to keep throwing original exc
+        }
+      }
+    }
+
+    return sortedWriter;
+  }
+
+  private SliceWriter sort() throws IOException {
+    if (heapWriter != null) {
+
+      assert valueCount < Integer.MAX_VALUE;
+
+      // All buffered points are still in heap
+      new InPlaceMergeSorter() {
+        @Override
+        protected void swap(int i, int j) {
+          int docID = heapWriter.docIDs[i];
+          heapWriter.docIDs[i] = heapWriter.docIDs[j];
+          heapWriter.docIDs[j] = docID;
+
+          long ord = heapWriter.ords[i];
+          heapWriter.ords[i] = heapWriter.ords[j];
+          heapWriter.ords[j] = ord;
+
+          long value = heapWriter.values[i];
+          heapWriter.values[i] = heapWriter.values[j];
+          heapWriter.values[j] = value;
+        }
+
+        @Override
+        protected int compare(int i, int j) {
+          int cmp = Long.compare(heapWriter.values[i], heapWriter.values[j]);
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          // Tie-break
+          cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
+        }
+      }.sort(0, (int) valueCount);
+
+      HeapSliceWriter sorted = new HeapSliceWriter((int) valueCount);
+      for(int i=0;i<valueCount;i++) {
+        sorted.append(heapWriter.values[i],
+                      heapWriter.ords[i],
+                      heapWriter.docIDs[i]);
+      }
+
+      return sorted;
+    } else {
+
+      // Offline sort:
+      assert tempDir != null;
+
+      final ByteArrayDataInput reader = new ByteArrayDataInput();
+      Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
+        private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+
+        @Override
+        public int compare(BytesRef a, BytesRef b) {
+          reader.reset(a.bytes, a.offset, a.length);
+          final long valueA = reader.readLong();
+          final int docIDA = reader.readVInt();
+          final long ordA = reader.readVLong();
+
+          reader.reset(b.bytes, b.offset, b.length);
+          final long valueB = reader.readLong();
+          final int docIDB = reader.readVInt();
+          final long ordB = reader.readVLong();
+
+          int cmp = Long.compare(valueA, valueB);
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          // Tie-break
+          cmp = Integer.compare(docIDA, docIDB);
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          return Long.compare(ordA, ordB);
+        }
+      };
+
+      Path sorted = tempDir.resolve("sorted");
+      boolean success = false;
+      try {
+        OfflineSorter sorter = new OfflineSorter(cmp, OfflineSorter.BufferSize.automatic(), tempDir, OfflineSorter.MAX_TEMPFILES);
+        sorter.sort(tempInput, sorted);
+        SliceWriter writer = convertToFixedWidth(sorted);
+        success = true;
+        return writer;
+      } finally {
+        if (success) {
+          IOUtils.rm(sorted);
+        } else {
+          IOUtils.deleteFilesIgnoringExceptions(sorted);
+        }
+      }
+    }
+  }
+
+  /** Writes the 1d BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
+  public long finish(IndexOutput out) throws IOException {
+
+    if (writer != null) {
+      writer.close();
+    }
+
+    if (valueCount == 0) {
+      throw new IllegalStateException("at least one value must be indexed");
+    }
+
+    // TODO: we should use in-memory sort here, if number of points is small enough:
+
+    long countPerLeaf = valueCount;
+    long innerNodeCount = 1;
+
+    while (countPerLeaf > maxValuesInLeafNode) {
+      countPerLeaf /= 2;
+      innerNodeCount *= 2;
+    }
+
+    //System.out.println("innerNodeCount=" + innerNodeCount);
+
+    if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
+      throw new IllegalStateException("too many nodes; increase maxValuesInLeafNode (currently " + maxValuesInLeafNode + ") and reindex");
+    }
+
+    innerNodeCount--;
+
+    int numLeaves = (int) (innerNodeCount+1);
+
+    // Indexed by nodeID, but first (root) nodeID is 1
+    long[] blockMinValues = new long[numLeaves];
+
+    // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
+    long[] leafBlockFPs = new long[numLeaves];
+
+    // Make sure the math above "worked":
+    assert valueCount / blockMinValues.length <= maxValuesInLeafNode: "valueCount=" + valueCount + " blockMinValues.length=" + blockMinValues.length + " maxValuesInLeafNode=" + maxValuesInLeafNode;
+    //System.out.println("  avg pointsPerLeaf=" + (valueCount/blockMinValues.length));
+
+    // Sort all docs by value:
+    SliceWriter sortedWriter = null;
+
+    boolean success = false;
+    try {
+      sortedWriter = sort();
+      heapWriter = null;
+
+      build(1, numLeaves,
+            new PathSlice(sortedWriter, 0, valueCount),
+            out,
+            globalMinValue, globalMaxValue,
+            blockMinValues,
+            leafBlockFPs);
+      success = true;
+    } finally {
+      if (success) {
+        sortedWriter.destroy();
+        IOUtils.rm(tempInput);
+      } else {
+        try {
+          sortedWriter.destroy();
+        } catch (Throwable t) {
+          // Suppress to keep throwing original exc
+        }
+        IOUtils.deleteFilesIgnoringExceptions(tempInput);
+      }
+    }
+
+    //System.out.println("Total nodes: " + innerNodeCount);
+
+    // Write index:
+    long indexFP = out.getFilePointer();
+    out.writeVInt(numLeaves);
+    out.writeVInt((int) (valueCount / numLeaves));
+
+    for (int i=0;i<blockMinValues.length;i++) {
+      out.writeLong(blockMinValues[i]);
+    }
+    for (int i=0;i<leafBlockFPs.length;i++) {
+      out.writeVLong(leafBlockFPs[i]);
+    }
+    out.writeLong(globalMaxValue);
+
+    if (tempDir != null) {
+      // If we had to go offline, we should have removed all temp files we wrote:
+      assert directoryIsEmpty(tempDir);
+      IOUtils.rm(tempDir);
+    }
+
+    return indexFP;
+  }
+
+  // Called only from assert
+  private boolean directoryIsEmpty(Path in) {
+    try (DirectoryStream<Path> dir = Files.newDirectoryStream(in)) {
+      for (Path path : dir) {
+        assert false: "dir=" + in + " still has file=" + path;
+        return false;
+      }
+    } catch (IOException ioe) {
+      // Just ignore: we are only called from assert
+    }
+    return true;
+  }
+
+  /** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
+  private static final class PathSlice {
+    final SliceWriter writer;
+    final long start;
+    final long count;
+
+    public PathSlice(SliceWriter writer, long start, long count) {
+      this.writer = writer;
+      this.start = start;
+      this.count = count;
+    }
+
+    @Override
+    public String toString() {
+      return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
+    }
+  }
+
+  private long getSplitValue(PathSlice source, long leftCount, long minValue, long maxValue) throws IOException {
+
+    // Read the split value:
+    SliceReader reader = source.writer.getReader(source.start + leftCount);
+    boolean success = false;
+    long splitValue;
+    try {
+      boolean result = reader.next();
+      assert result;
+      splitValue = reader.value();
+      assert splitValue >= minValue && splitValue <= maxValue: "splitValue=" + splitValue + " minValue=" + minValue + " maxValue=" + maxValue + " reader=" + reader;
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(reader);
+      } else {
+        IOUtils.closeWhileHandlingException(reader);
+      }
+    }
+
+    return splitValue;
+  }
+
+  /** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
+  private void build(int nodeID, int leafNodeOffset,
+                     PathSlice source,
+                     IndexOutput out,
+                     long minValue, long maxValue,
+                     long[] blockMinValues,
+                     long[] leafBlockFPs) throws IOException {
+
+    long count = source.count;
+
+    if (source.writer instanceof OfflineSliceWriter && count <= maxValuesSortInHeap) {
+      // Cutover to heap:
+      SliceWriter writer = new HeapSliceWriter((int) count);
+      SliceReader reader = source.writer.getReader(source.start);
+      for(int i=0;i<count;i++) {
+        boolean hasNext = reader.next();
+        assert hasNext;
+        writer.append(reader.value(), reader.ord(), reader.docID());
+      }
+      source = new PathSlice(writer, 0, count);
+    }
+
+    // We should never hit dead-end nodes on recursion even in the adversarial cases:
+    assert count > 0;
+
+    if (nodeID >= leafNodeOffset) {
+      // Leaf node: write block
+      assert maxValue >= minValue;
+
+      //System.out.println("\nleaf:\n  lat range: " + ((long) maxLatEnc-minLatEnc));
+      //System.out.println("  lon range: " + ((long) maxLonEnc-minLonEnc));
+
+      // Sort by docID in the leaf so we can .or(DISI) at search time:
+      SliceReader reader = source.writer.getReader(source.start);
+
+      int[] docIDs = new int[(int) count];
+
+      boolean success = false;
+      try {
+        for (int i=0;i<source.count;i++) {
+
+          // NOTE: we discard ord at this point; we only needed it temporarily
+          // during building to uniquely identify each point to properly handle
+          // the multi-valued case (one docID having multiple values):
+
+          // We also discard lat/lon, since at search time, we reside on the
+          // wrapped doc values for this:
+
+          boolean result = reader.next();
+          assert result;
+          docIDs[i] = reader.docID();
+        }
+        success = true;
+      } finally {
+        if (success) {
+          IOUtils.close(reader);
+        } else {
+          IOUtils.closeWhileHandlingException(reader);
+        }
+      }
+
+      // TODO: not clear we need to do this anymore (we used to make a DISI over
+      // the block at search time), but maybe it buys some memory
+      // locality/sequentiality at search time?
+      Arrays.sort(docIDs);
+
+      // Dedup docIDs: for the multi-valued case where more than one value for the doc
+      // wound up in this leaf cell, we only need to store the docID once:
+      int lastDocID = -1;
+      int uniqueCount = 0;
+      for(int i=0;i<docIDs.length;i++) {
+        int docID = docIDs[i];
+        if (docID != lastDocID) {
+          uniqueCount++;
+          lastDocID = docID;
+        }
+      }
+      assert uniqueCount <= count;
+
+      // TODO: in theory we could compute exactly what this fp will be, since we fixed-width (writeInt) encode docID, and up-front we know
+      // how many docIDs are in every leaf since we don't do anything special about multiple splitValue boundary case?
+      long startFP = out.getFilePointer();
+      out.writeVInt(uniqueCount);
+
+      // Save the block file pointer:
+      int blockID = nodeID - leafNodeOffset;
+      leafBlockFPs[blockID] = startFP;
+      //System.out.println("    leafFP=" + startFP);
+
+      blockMinValues[blockID] = minValue;
+
+      lastDocID = -1;
+      for (int i=0;i<docIDs.length;i++) {
+        // Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
+        // 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
+        // I think if we also indexed prefix terms here we could do less costly compression
+        // on those lists:
+        int docID = docIDs[i];
+        if (docID != lastDocID) {
+          out.writeInt(docID);
+          lastDocID = docID;
+        }
+      }
+      //long endFP = out.getFilePointer();
+      //System.out.println("  bytes/doc: " + ((endFP - startFP) / count));
+    } else {
+      // Inner node: sort, partition/recurse
+
+      assert nodeID < blockMinValues.length: "nodeID=" + nodeID + " blockMinValues.length=" + blockMinValues.length;
+
+      assert source.count == count;
+
+      long leftCount = source.count / 2;
+
+      // NOTE: we don't tweak leftCount for the boundary cases, which means at search time if we are looking for exactly splitValue then we
+      // must search both left and right trees:
+      long splitValue = getSplitValue(source, leftCount, minValue, maxValue);
+
+      build(2*nodeID, leafNodeOffset,
+            new PathSlice(source.writer, source.start, leftCount),
+            out,
+            minValue, splitValue,
+            blockMinValues, leafBlockFPs);
+
+      build(2*nodeID+1, leafNodeOffset,
+            new PathSlice(source.writer, source.start+leftCount, count-leftCount),
+            out,
+            splitValue, maxValue,
+            blockMinValues, leafBlockFPs);
+    }
+  }
+
+  SliceWriter getWriter(long count) throws IOException {
+    if (count < maxValuesSortInHeap) {
+      return new HeapSliceWriter((int) count);
+    } else {
+      return new OfflineSliceWriter(tempDir, count);
+    }
+  }
+}

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceReader.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,31 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Iterates over one slice of the sorted values.  This abstracts away whether
+ *  OfflineSorter or simple arrays in heap are used. */
+interface SliceReader extends Closeable {
+  boolean next() throws IOException;
+  long value();
+  long ord();
+  int docID();
+}
+

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SliceWriter.java Sat Aug  1 07:48:53 2015
@@ -0,0 +1,29 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
+interface SliceWriter extends Closeable {
+  void append(long value, long ord, int docID) throws IOException;
+  SliceReader getReader(long start) throws IOException;
+  void destroy() throws IOException;
+}
+



Mime
View raw message