lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject [5/6] lucene-solr:branch_6x: SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
Date Fri, 27 May 2016 16:42:23 GMT
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5525f429/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
new file mode 100644
index 0000000..4b60dba
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
@@ -0,0 +1,887 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.uninverting;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.lucene.codecs.PostingsFormat; // javadocs
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * This class enables fast access to multiple term ords for
+ * a specified field across all docIDs.
+ *
+ * Like FieldCache, it uninverts the index and holds a
+ * packed data structure in RAM to enable fast access.
+ * Unlike FieldCache, it can handle multi-valued fields,
+ * and, it does not hold the term bytes in RAM.  Rather, you
+ * must obtain a TermsEnum from the {@link #getOrdTermsEnum}
+ * method, and then seek-by-ord to get the term's bytes.
+ *
+ * While normally term ords are type long, in this API they are
+ * int as the internal representation here cannot address
+ * more than MAX_INT unique terms.  Also, typically this
+ * class is used on fields with relatively few unique terms
+ * vs the number of documents.  In addition, there is an
+ * internal limit (16 MB) on how many bytes each chunk of
+ * documents may consume.  If you trip this limit you'll hit
+ * an IllegalStateException.
+ *
+ * Deleted documents are skipped during uninversion, and if
+ * you look them up you'll get 0 ords.
+ *
+ * The returned per-document ords do not retain their
+ * original order in the document.  Instead they are returned
+ * in sorted (by ord, ie term's BytesRef comparator) order.  They
+ * are also de-dup'd (ie if doc has same term more than once
+ * in this field, you'll only get that ord back once).
+ *
+ * This class
+ * will create its own term index internally, allowing to
+ * create a wrapped TermsEnum that can handle ord.  The
+ * {@link #getOrdTermsEnum} method then provides this
+ * wrapped enum.
+ *
+ * The RAM consumption of this class can be high!
+ *
+ * @lucene.experimental
+ */
+
+/*
+ * Final form of the un-inverted field:
+ *   Each document points to a list of term numbers that are contained in that document.
+ *
+ *   Term numbers are in sorted order, and are encoded as variable-length deltas from the
+ *   previous term number.  Real term numbers start at 2 since 0 and 1 are reserved.  A
+ *   term number of 0 signals the end of the termNumber list.
+ *
+ *   There is a single int[maxDoc()] which either contains a pointer into a byte[] for
+ *   the termNumber lists, or directly contains the termNumber list if it fits in the 4
+ *   bytes of an integer.  If the first byte in the integer is 1, the next 3 bytes
+ *   are a pointer into a byte[] where the termNumber list starts.
+ *
+ *   There are actually 256 byte arrays, to compensate for the fact that the pointers
+ *   into the byte arrays are only 3 bytes long.  The correct byte array for a document
+ *   is a function of its id.
+ *
+ *   To save space and speed up faceting, any term that matches enough documents will
+ *   not be un-inverted... it will be skipped while building the un-inverted field structure,
+ *   and will use a set intersection method during faceting.
+ *
+ *   To further save memory, the terms (the actual string values) are not all stored in
+ *   memory, but a TermIndex is used to convert term numbers to term values only
+ *   for the terms needed after faceting has completed.  Only every 128th term value
+ *   is stored, along with its corresponding term number, and this is used as an
+ *   index to find the closest term and iterate until the desired number is hit (very
+ *   much like Lucene's own internal term index).
+ *
+ */
+
+public class DocTermOrds implements Accountable {
+
+  // Term ords are shifted by this, internally, to reserve
+  // values 0 (end term) and 1 (index is a pointer into byte array)
+  private final static int TNUM_OFFSET = 2;
+
+  /** Every 128th term is indexed, by default. */
+  public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing
+
+  private int indexIntervalBits;
+  private int indexIntervalMask;
+  private int indexInterval;
+
+  /** Don't uninvert terms that exceed this count. */
+  protected final int maxTermDocFreq;
+
+  /** Field we are uninverting. */
+  protected final String field;
+
+  /** Number of terms in the field. */
+  protected int numTermsInField;
+
+  /** Total number of references to term numbers. */
+  protected long termInstances;
+  private long memsz;
+
+  /** Total time to uninvert the field. */
+  protected int total_time;
+
+  /** Time for phase1 of the uninvert process. */
+  protected int phase1_time;
+
+  /** Holds the per-document ords or a pointer to the ords. */
+  protected int[] index;
+
+  /** Holds term ords for documents. */
+  protected byte[][] tnums = new byte[256][];
+
+  /** Total bytes (sum of term lengths) for all indexed terms.*/
+  protected long sizeOfIndexedStrings;
+
+  /** Holds the indexed (by default every 128th) terms. */
+  protected BytesRef[] indexedTermsArray = new BytesRef[0];
+
+  /** If non-null, only terms matching this prefix were
+   *  indexed. */
+  protected BytesRef prefix;
+
+  /** Ordinal of the first term in the field, or 0 if the
+   *  {@link PostingsFormat} does not implement {@link
+   *  TermsEnum#ord}. */
+  protected int ordBase;
+
+  /** Used while uninverting. */
+  protected PostingsEnum postingsEnum;
+
+  /** If true, check and throw an exception if the field has docValues enabled.
+   * Normally, docValues should be used in preference to DocTermOrds. */
+  protected boolean checkForDocValues = true;
+
+  /** Returns total bytes used. */
+  public long ramBytesUsed() {
+    // can cache the mem size since it shouldn't change
+    if (memsz!=0) return memsz;
+    long sz = 8*8 + 32; // local fields
+    if (index != null) sz += index.length * 4;
+    if (tnums!=null) {
+      for (byte[] arr : tnums)
+        if (arr != null) sz += arr.length;
+    }
+    memsz = sz;
+    return sz;
+  }
+
+  /** Inverts all terms */
+  public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException {
+    this(reader, liveDocs, field, null, Integer.MAX_VALUE);
+  }
+  
+  // TODO: instead of all these ctors and options, take termsenum!
+
+  /** Inverts only terms starting w/ prefix */
+  public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {
+    this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE);
+  }
+
+  /** Inverts only terms starting w/ prefix, and only terms
+   *  whose docFreq (not taking deletions into account) is
+   *  <=  maxTermDocFreq */
+  public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException {
+    this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS);
+  }
+
+  /** Inverts only terms starting w/ prefix, and only terms
+   *  whose docFreq (not taking deletions into account) is
+   *  <=  maxTermDocFreq, with a custom indexing interval
+   *  (default is every 128nd term). */
+  public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException {
+    this(field, maxTermDocFreq, indexIntervalBits);
+    uninvert(reader, liveDocs, termPrefix);
+  }
+
+  /** Subclass inits w/ this, but be sure you then call
+   *  uninvert, only once */
+  protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) {
+    //System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
+    this.field = field;
+    this.maxTermDocFreq = maxTermDocFreq;
+    this.indexIntervalBits = indexIntervalBits;
+    indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits);
+    indexInterval = 1 << indexIntervalBits;
+  }
+
+  /** 
+   * Returns a TermsEnum that implements ord, or null if no terms in field.
+   * <p>
+   *  we build a "private" terms
+   *  index internally (WARNING: consumes RAM) and use that
+   *  index to implement ord.  This also enables ord on top
+   *  of a composite reader.  The returned TermsEnum is
+   *  unpositioned.  This returns null if there are no terms.
+   * </p>
+   *  <p><b>NOTE</b>: you must pass the same reader that was
+   *  used when creating this class 
+   */
+  public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
+    // NOTE: see LUCENE-6529 before attempting to optimize this method to
+    // return a TermsEnum directly from the reader if it already supports ord().
+
+    assert null != indexedTermsArray;
+    
+    if (0 == indexedTermsArray.length) {
+      return null;
+    } else {
+      return new OrdWrappedTermsEnum(reader);
+    }
+  }
+
+  /**
+   * Returns the number of terms in this field
+   */
+  public int numTerms() {
+    return numTermsInField;
+  }
+
+  /**
+   * Returns {@code true} if no terms were indexed.
+   */
+  public boolean isEmpty() {
+    return index == null;
+  }
+
+  /** Subclass can override this */
+  protected void visitTerm(TermsEnum te, int termNum) throws IOException {
+  }
+
+  /** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)}
+   *  to record the document frequency for each uninverted
+   *  term. */
+  protected void setActualDocFreq(int termNum, int df) throws IOException {
+  }
+
+  /** Call this only once (if you subclass!) */
+  protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
+    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
+    if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
+      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
+    }
+    //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
+    final long startTime = System.nanoTime();
+    prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
+
+    final int maxDoc = reader.maxDoc();
+    final int[] index = new int[maxDoc];       // immediate term numbers, or the index into the byte[] representing the last number
+    final int[] lastTerm = new int[maxDoc];    // last term we saw for this document
+    final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
+
+    final Terms terms = reader.terms(field);
+    if (terms == null) {
+      // No terms
+      return;
+    }
+
+    final TermsEnum te = terms.iterator();
+    final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
+    //System.out.println("seekStart=" + seekStart.utf8ToString());
+    if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
+      // No terms match
+      return;
+    }
+
+    // For our "term index wrapper"
+    final List<BytesRef> indexedTerms = new ArrayList<>();
+    final PagedBytes indexedTermsBytes = new PagedBytes(15);
+
+    // we need a minimum of 9 bytes, but round up to 12 since the space would
+    // be wasted with most allocators anyway.
+    byte[] tempArr = new byte[12];
+
+    //
+    // enumerate all terms, and build an intermediate form of the un-inverted field.
+    //
+    // During this intermediate form, every document has a (potential) byte[]
+    // and the int[maxDoc()] array either contains the termNumber list directly
+    // or the *end* offset of the termNumber list in its byte array (for faster
+    // appending and faster creation of the final form).
+    //
+    // idea... if things are too large while building, we could do a range of docs
+    // at a time (but it would be a fair amount slower to build)
+    // could also do ranges in parallel to take advantage of multiple CPUs
+
+    // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
+    // values.  This requires going over the field first to find the most
+    // frequent terms ahead of time.
+
+    int termNum = 0;
+    postingsEnum = null;
+
+    // Loop begins with te positioned to first term (we call
+    // seek above):
+    for (;;) {
+      final BytesRef t = te.term();
+      if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
+        break;
+      }
+      //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
+
+      visitTerm(te, termNum);
+
+      if ((termNum & indexIntervalMask) == 0) {
+        // Index this term
+        sizeOfIndexedStrings += t.length;
+        BytesRef indexedTerm = new BytesRef();
+        indexedTermsBytes.copy(t, indexedTerm);
+        // TODO: really should 1) strip off useless suffix,
+        // and 2) use FST not array/PagedBytes
+        indexedTerms.add(indexedTerm);
+      }
+
+      final int df = te.docFreq();
+      if (df <= maxTermDocFreq) {
+
+        postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);
+
+        // dF, but takes deletions into account
+        int actualDF = 0;
+
+        for (;;) {
+          int doc = postingsEnum.nextDoc();
+          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+            break;
+          }
+          //System.out.println("  chunk=" + chunk + " docs");
+
+          actualDF ++;
+          termInstances++;
+          
+          //System.out.println("    docID=" + doc);
+          // add TNUM_OFFSET to the term number to make room for special reserved values:
+          // 0 (end term) and 1 (index into byte array follows)
+          int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
+          lastTerm[doc] = termNum;
+          int val = index[doc];
+
+          if ((val & 0xff)==1) {
+            // index into byte array (actually the end of
+            // the doc-specific byte[] when building)
+            int pos = val >>> 8;
+            int ilen = vIntSize(delta);
+            byte[] arr = bytes[doc];
+            int newend = pos+ilen;
+            if (newend > arr.length) {
+              // We avoid a doubling strategy to lower memory usage.
+              // this faceting method isn't for docs with many terms.
+              // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
+              // TODO: figure out what array lengths we can round up to w/o actually using more memory
+              // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
+              // It should be safe to round up to the nearest 32 bits in any case.
+              int newLen = (newend + 3) & 0xfffffffc;  // 4 byte alignment
+              byte[] newarr = new byte[newLen];
+              System.arraycopy(arr, 0, newarr, 0, pos);
+              arr = newarr;
+              bytes[doc] = newarr;
+            }
+            pos = writeInt(delta, arr, pos);
+            index[doc] = (pos<<8) | 1;  // update pointer to end index in byte[]
+          } else {
+            // OK, this int has data in it... find the end (a zero starting byte - not
+            // part of another number, hence not following a byte with the high bit set).
+            int ipos;
+            if (val==0) {
+              ipos=0;
+            } else if ((val & 0x0000ff80)==0) {
+              ipos=1;
+            } else if ((val & 0x00ff8000)==0) {
+              ipos=2;
+            } else if ((val & 0xff800000)==0) {
+              ipos=3;
+            } else {
+              ipos=4;
+            }
+
+            //System.out.println("      ipos=" + ipos);
+
+            int endPos = writeInt(delta, tempArr, ipos);
+            //System.out.println("      endpos=" + endPos);
+            if (endPos <= 4) {
+              //System.out.println("      fits!");
+              // value will fit in the integer... move bytes back
+              for (int j=ipos; j<endPos; j++) {
+                val |= (tempArr[j] & 0xff) << (j<<3);
+              }
+              index[doc] = val;
+            } else {
+              // value won't fit... move integer into byte[]
+              for (int j=0; j<ipos; j++) {
+                tempArr[j] = (byte)val;
+                val >>>=8;
+              }
+              // point at the end index in the byte[]
+              index[doc] = (endPos<<8) | 1;
+              bytes[doc] = tempArr;
+              tempArr = new byte[12];
+            }
+          }
+        }
+        setActualDocFreq(termNum, actualDF);
+      }
+
+      termNum++;
+      if (te.next() == null) {
+        break;
+      }
+    }
+
+    numTermsInField = termNum;
+
+    long midPoint = System.nanoTime();
+
+    if (termInstances == 0) {
+      // we didn't invert anything
+      // lower memory consumption.
+      tnums = null;
+    } else {
+
+      this.index = index;
+
+      //
+      // transform intermediate form into the final form, building a single byte[]
+      // at a time, and releasing the intermediate byte[]s as we go to avoid
+      // increasing the memory footprint.
+      //
+
+      for (int pass = 0; pass<256; pass++) {
+        byte[] target = tnums[pass];
+        int pos=0;  // end in target;
+        if (target != null) {
+          pos = target.length;
+        } else {
+          target = new byte[4096];
+        }
+
+        // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
+        // where pp is the pass (which array we are building), and xx is all values.
+        // each pass shares the same byte[] for termNumber lists.
+        for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
+          int lim = Math.min(docbase + (1<<16), maxDoc);
+          for (int doc=docbase; doc<lim; doc++) {
+            //System.out.println("  pass=" + pass + " process docID=" + doc);
+            int val = index[doc];
+            if ((val&0xff) == 1) {
+              int len = val >>> 8;
+              //System.out.println("    ptr pos=" + pos);
+              index[doc] = (pos<<8)|1; // change index to point to start of array
+              if ((pos & 0xff000000) != 0) {
+                // we only have 24 bits for the array index
+                throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
+              }
+              byte[] arr = bytes[doc];
+              /*
+              for(byte b : arr) {
+                //System.out.println("      b=" + Integer.toHexString((int) b));
+              }
+              */
+              bytes[doc] = null;        // IMPORTANT: allow GC to avoid OOM
+              if (target.length <= pos + len) {
+                int newlen = target.length;
+                /*** we don't have to worry about the array getting too large
+                 * since the "pos" param will overflow first (only 24 bits available)
+                if ((newlen<<1) <= 0) {
+                  // overflow...
+                  newlen = Integer.MAX_VALUE;
+                  if (newlen <= pos + len) {
+                    throw new SolrException(400,"Too many terms to uninvert field!");
+                  }
+                } else {
+                  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
+                }
+                ****/
+                while (newlen <= pos + len) newlen<<=1;  // doubling strategy                 
+                byte[] newtarget = new byte[newlen];
+                System.arraycopy(target, 0, newtarget, 0, pos);
+                target = newtarget;
+              }
+              System.arraycopy(arr, 0, target, pos, len);
+              pos += len + 1;  // skip single byte at end and leave it 0 for terminator
+            }
+          }
+        }
+
+        // shrink array
+        if (pos < target.length) {
+          byte[] newtarget = new byte[pos];
+          System.arraycopy(target, 0, newtarget, 0, pos);
+          target = newtarget;
+        }
+        
+        tnums[pass] = target;
+
+        if ((pass << 16) > maxDoc)
+          break;
+      }
+
+    }
+    indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
+
+    long endTime = System.nanoTime();
+
+    total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS);
+    phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS);
+  }
+
+  /** Number of bytes to represent an unsigned int as a vint. */
+  private static int vIntSize(int x) {
+    if ((x & (0xffffffff << (7*1))) == 0 ) {
+      return 1;
+    }
+    if ((x & (0xffffffff << (7*2))) == 0 ) {
+      return 2;
+    }
+    if ((x & (0xffffffff << (7*3))) == 0 ) {
+      return 3;
+    }
+    if ((x & (0xffffffff << (7*4))) == 0 ) {
+      return 4;
+    }
+    return 5;
+  }
+
+  // todo: if we know the size of the vInt already, we could do
+  // a single switch on the size
+  private static int writeInt(int x, byte[] arr, int pos) {
+    int a;
+    a = (x >>> (7*4));
+    if (a != 0) {
+      arr[pos++] = (byte)(a | 0x80);
+    }
+    a = (x >>> (7*3));
+    if (a != 0) {
+      arr[pos++] = (byte)(a | 0x80);
+    }
+    a = (x >>> (7*2));
+    if (a != 0) {
+      arr[pos++] = (byte)(a | 0x80);
+    }
+    a = (x >>> (7*1));
+    if (a != 0) {
+      arr[pos++] = (byte)(a | 0x80);
+    }
+    arr[pos++] = (byte)(x & 0x7f);
+    return pos;
+  }
+
+  /** 
+   * "wrap" our own terms index around the original IndexReader. 
+   * Only valid if there are terms for this field rom the original reader
+   */
+  private final class OrdWrappedTermsEnum extends TermsEnum {
+    private final TermsEnum termsEnum;
+    private BytesRef term;
+    private long ord = -indexInterval-1;          // force "real" seek
+    
+    public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
+      assert indexedTermsArray != null;
+      assert 0 != indexedTermsArray.length;
+      termsEnum = reader.fields().terms(field).iterator();
+    }
+
+    @Override    
+    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+      return termsEnum.postings(reuse, flags);
+    }
+
+    @Override
+    public BytesRef term() {
+      return term;
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+      if (++ord < 0) {
+        ord = 0;
+      }
+      if (termsEnum.next() == null) {
+        term = null;
+        return null;
+      }
+      return setTerm();  // this is extra work if we know we are in bounds...
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      return termsEnum.docFreq();
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      return termsEnum.totalTermFreq();
+    }
+
+    @Override
+    public long ord() {
+      return ordBase + ord;
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef target) throws IOException {
+
+      // already here
+      if (term != null && term.equals(target)) {
+        return SeekStatus.FOUND;
+      }
+
+      int startIdx = Arrays.binarySearch(indexedTermsArray, target);
+
+      if (startIdx >= 0) {
+        // we hit the term exactly... lucky us!
+        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
+        assert seekStatus == TermsEnum.SeekStatus.FOUND;
+        ord = startIdx << indexIntervalBits;
+        setTerm();
+        assert term != null;
+        return SeekStatus.FOUND;
+      }
+
+      // we didn't hit the term exactly
+      startIdx = -startIdx-1;
+    
+      if (startIdx == 0) {
+        // our target occurs *before* the first term
+        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
+        assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
+        ord = 0;
+        setTerm();
+        assert term != null;
+        return SeekStatus.NOT_FOUND;
+      }
+
+      // back up to the start of the block
+      startIdx--;
+
+      if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
+        // we are already in the right block and the current term is before the term we want,
+        // so we don't need to seek.
+      } else {
+        // seek to the right block
+        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
+        assert seekStatus == TermsEnum.SeekStatus.FOUND;
+        ord = startIdx << indexIntervalBits;
+        setTerm();
+        assert term != null;  // should be non-null since it's in the index
+      }
+
+      while (term != null && term.compareTo(target) < 0) {
+        next();
+      }
+
+      if (term == null) {
+        return SeekStatus.END;
+      } else if (term.compareTo(target) == 0) {
+        return SeekStatus.FOUND;
+      } else {
+        return SeekStatus.NOT_FOUND;
+      }
+    }
+
+    @Override
+    public void seekExact(long targetOrd) throws IOException {
+      int delta = (int) (targetOrd - ordBase - ord);
+      //System.out.println("  seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
+      if (delta < 0 || delta > indexInterval) {
+        final int idx = (int) (targetOrd >>> indexIntervalBits);
+        final BytesRef base = indexedTermsArray[idx];
+        //System.out.println("  do seek term=" + base.utf8ToString());
+        ord = idx << indexIntervalBits;
+        delta = (int) (targetOrd - ord);
+        final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
+        assert seekStatus == TermsEnum.SeekStatus.FOUND;
+      } else {
+        //System.out.println("seek w/in block");
+      }
+
+      while (--delta >= 0) {
+        BytesRef br = termsEnum.next();
+        if (br == null) {
+          assert false;
+          return;
+        }
+        ord++;
+      }
+
+      setTerm();
+      assert term != null;
+    }
+
+    private BytesRef setTerm() throws IOException {
+      term = termsEnum.term();
+      //System.out.println("  setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
+      if (prefix != null && !StringHelper.startsWith(term, prefix)) {
+        term = null;
+      }
+      return term;
+    }
+  }
+
+  /** Returns the term ({@link BytesRef}) corresponding to
+   *  the provided ordinal. */
+  public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
+    termsEnum.seekExact(ord);
+    return termsEnum.term();
+  }
+  
+  /** Returns a SortedSetDocValues view of this instance */
+  public SortedSetDocValues iterator(LeafReader reader) throws IOException {
+    if (isEmpty()) {
+      return DocValues.emptySortedSet();
+    } else {
+      return new Iterator(reader);
+    }
+  }
+  
+  private class Iterator extends SortedSetDocValues {
+    final LeafReader reader;
+    final TermsEnum te;  // used internally for lookupOrd() and lookupTerm()
+    // currently we read 5 at a time (using the logic of the old iterator)
+    final int buffer[] = new int[5];
+    int bufferUpto;
+    int bufferLength;
+    
+    private int tnum;
+    private int upto;
+    private byte[] arr;
+    
+    Iterator(LeafReader reader) throws IOException {
+      this.reader = reader;
+      this.te = termsEnum();
+    }
+    
+    @Override
+    public long nextOrd() {
+      while (bufferUpto == bufferLength) {
+        if (bufferLength < buffer.length) {
+          return NO_MORE_ORDS;
+        } else {
+          bufferLength = read(buffer);
+          bufferUpto = 0;
+        }
+      }
+      return buffer[bufferUpto++];
+    }
+    
+    /** Buffer must be at least 5 ints long.  Returns number
+     *  of term ords placed into buffer; if this count is
+     *  less than buffer.length then that is the end. */
+    int read(int[] buffer) {
+      int bufferUpto = 0;
+      if (arr == null) {
+        // code is inlined into upto
+        //System.out.println("inlined");
+        int code = upto;
+        int delta = 0;
+        for (;;) {
+          delta = (delta << 7) | (code & 0x7f);
+          if ((code & 0x80)==0) {
+            if (delta==0) break;
+            tnum += delta - TNUM_OFFSET;
+            buffer[bufferUpto++] = ordBase+tnum;
+            //System.out.println("  tnum=" + tnum);
+            delta = 0;
+          }
+          code >>>= 8;
+        }
+      } else {
+        // code is a pointer
+        for(;;) {
+          int delta = 0;
+          for(;;) {
+            byte b = arr[upto++];
+            delta = (delta << 7) | (b & 0x7f);
+            //System.out.println("    cycle: upto=" + upto + " delta=" + delta + " b=" + b);
+            if ((b & 0x80) == 0) break;
+          }
+          //System.out.println("  delta=" + delta);
+          if (delta == 0) break;
+          tnum += delta - TNUM_OFFSET;
+          //System.out.println("  tnum=" + tnum);
+          buffer[bufferUpto++] = ordBase+tnum;
+          if (bufferUpto == buffer.length) {
+            break;
+          }
+        }
+      }
+
+      return bufferUpto;
+    }
+
+    @Override
+    public void setDocument(int docID) {
+      tnum = 0;
+      final int code = index[docID];
+      if ((code & 0xff)==1) {
+        // a pointer
+        upto = code>>>8;
+        //System.out.println("    pointer!  upto=" + upto);
+        int whichArray = (docID >>> 16) & 0xff;
+        arr = tnums[whichArray];
+      } else {
+        //System.out.println("    inline!");
+        arr = null;
+        upto = code;
+      }
+      bufferUpto = 0;
+      bufferLength = read(buffer);
+    }
+
+    @Override
+    public BytesRef lookupOrd(long ord) {
+      try {
+        return DocTermOrds.this.lookupTerm(te, (int) ord);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public long getValueCount() {
+      return numTerms();
+    }
+
+    @Override
+    public long lookupTerm(BytesRef key) {
+      try {
+        switch (te.seekCeil(key)) {
+          case FOUND:           
+            assert te.ord() >= 0;
+            return te.ord();
+          case NOT_FOUND:
+            assert te.ord() >= 0;
+            return -te.ord()-1;
+          default: /* END */
+            return -numTerms()-1;
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    @Override
+    public TermsEnum termsEnum() {    
+      try {
+        return getOrdTermsEnum(reader);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5525f429/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
new file mode 100644
index 0000000..7ef4956
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -0,0 +1,466 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.uninverting;
+
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.IndexReader; // javadocs
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LegacyNumericUtils;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Expert: Maintains caches of term values.
+ *
+ * <p>Created: May 19, 2004 11:13:14 AM
+ *
+ * @since   lucene 1.4
+ * @see FieldCacheSanityChecker
+ *
+ * @lucene.internal
+ */
+interface FieldCache {
+
+  /**
+   * Placeholder indicating creation of this cache is currently in-progress.
+   */
+  public static final class CreationPlaceholder implements Accountable {
+    Accountable value;
+
+    @Override
+    public long ramBytesUsed() {
+      // don't call on the in-progress value, might make things angry.
+      return RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+    }
+  }
+
+  /**
+   * interface to all parsers. It is used to parse different numeric types.
+   */
+  public interface Parser {
+    
+    /**
+     * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
+     * to filter the actual TermsEnum before the field cache is filled.
+     * 
+     * @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
+     * @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
+     * @throws IOException if an {@link IOException} occurs
+     * @deprecated index with Points instead
+     */
+    @Deprecated
+    public TermsEnum termsEnum(Terms terms) throws IOException;
+    
+    /** Parse's this field's value */
+    public long parseValue(BytesRef term);
+  }
+  
+  /**
+   * Base class for points parsers. These parsers do not use the inverted index, but instead
+   * uninvert point data.
+   * 
+   * This abstraction can be cleaned up when Parser.termsEnum is removed.
+   */
+  public abstract class PointParser implements Parser {
+    public final TermsEnum termsEnum(Terms terms) throws IOException {
+      throw new UnsupportedOperationException("makes no sense for parsing points");
+    }
+  }
+
+  /** Expert: The cache used internally by sorting and range query classes. */
+  public static FieldCache DEFAULT = new FieldCacheImpl();
+
+  /**
+   * A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.IntPoint}.
+   */
+  public static final Parser INT_POINT_PARSER = new PointParser() {
+    @Override
+    public long parseValue(BytesRef point) {
+      return NumericUtils.sortableBytesToInt(point.bytes, point.offset);
+    }
+    
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".INT_POINT_PARSER"; 
+    }
+  };
+  
+  /**
+   * A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.LongPoint}.
+   */
+  public static final Parser LONG_POINT_PARSER = new PointParser() {
+    @Override
+    public long parseValue(BytesRef point) {
+      return NumericUtils.sortableBytesToLong(point.bytes, point.offset);
+    }
+    
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".LONG_POINT_PARSER"; 
+    }
+  };
+  
+  /**
+   * A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.FloatPoint}.
+   */
+  public static final Parser FLOAT_POINT_PARSER = new PointParser() {
+    @Override
+    public long parseValue(BytesRef point) {
+      return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset));
+    }
+    
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".FLOAT_POINT_PARSER"; 
+    }
+  };
+  
+  /**
+   * A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.DoublePoint}.
+   */
+  public static final Parser DOUBLE_POINT_PARSER = new PointParser() {
+    @Override
+    public long parseValue(BytesRef point) {
+      return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset));
+    }
+    
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".DOUBLE_POINT_PARSER"; 
+    }
+  };
+  
+  /**
+   * A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
+   * @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
+   */
+  @Deprecated
+  public static final Parser LEGACY_INT_PARSER = new Parser() {
+    @Override
+    public long parseValue(BytesRef term) {
+      return LegacyNumericUtils.prefixCodedToInt(term);
+    }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
+    }
+    
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".LEGACY_INT_PARSER"; 
+    }
+  };
+
+  /**
+   * A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
+   * @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
+   */
+  @Deprecated
+  public static final Parser LEGACY_FLOAT_PARSER = new Parser() {
+    @Override
+    public long parseValue(BytesRef term) {
+      int val = LegacyNumericUtils.prefixCodedToInt(term);
+      if (val<0) val ^= 0x7fffffff;
+      return val;
+    }
+    
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER"; 
+    }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
+    }
+  };
+
+  /**
+   * A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
+   * @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
+   */
+  @Deprecated
+  public static final Parser LEGACY_LONG_PARSER = new Parser() {
+    @Override
+    public long parseValue(BytesRef term) {
+      return LegacyNumericUtils.prefixCodedToLong(term);
+    }
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".LEGACY_LONG_PARSER"; 
+    }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
+    }
+  };
+
+  /**
+   * A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
+   * via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
+   * @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
+   */
+  @Deprecated
+  public static final Parser LEGACY_DOUBLE_PARSER = new Parser() {
+    @Override
+    public long parseValue(BytesRef term) {
+      long val = LegacyNumericUtils.prefixCodedToLong(term);
+      if (val<0) val ^= 0x7fffffffffffffffL;
+      return val;
+    }
+    @Override
+    public String toString() { 
+      return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER"; 
+    }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
+    }
+  };
+  
+  /** Checks the internal cache for an appropriate entry, and if none is found,
+   *  reads the terms/points in <code>field</code> and returns a bit set at the size of
+   *  <code>reader.maxDoc()</code>, with turned on bits for each docid that 
+   *  does have a value for this field.
+   *  @param parser May be {@code null} if coming from the inverted index, otherwise
+   *                can be a {@link PointParser} to compute from point values.
+   */
+  public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException;
+
+  /**
+   * Returns a {@link NumericDocValues} over the values found in documents in the given
+   * field. If the field was indexed as {@link NumericDocValuesField}, it simply
+   * uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values.
+   * Otherwise, it checks the internal cache for an appropriate entry, and if
+   * none is found, reads the terms/points in <code>field</code> as longs and returns
+   * an array of size <code>reader.maxDoc()</code> of the value each document
+   * has in the given field.
+   * 
+   * @param reader
+   *          Used to get field values.
+   * @param field
+   *          Which field contains the longs.
+   * @param parser
+   *          Computes long for string values. May be {@code null} if the
+   *          requested field was indexed as {@link NumericDocValuesField} or
+   *          {@link org.apache.lucene.document.LegacyLongField}.
+   * @param setDocsWithField
+   *          If true then {@link #getDocsWithField} will also be computed and
+   *          stored in the FieldCache.
+   * @return The values in the given field for each document.
+   * @throws IOException
+   *           If any error occurs.
+   */
+  public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
+  
+  /** Checks the internal cache for an appropriate entry, and if none
+   * is found, reads the term values in <code>field</code>
+   * and returns a {@link BinaryDocValues} instance, providing a
+   * method to retrieve the term (as a BytesRef) per document.
+   * @param reader  Used to get field values.
+   * @param field   Which field contains the strings.
+   * @param setDocsWithField  If true then {@link #getDocsWithField} will
+   *        also be computed and stored in the FieldCache.
+   * @return The values in the given field for each document.
+   * @throws IOException  If any error occurs.
+   */
+  public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException;
+
+  /** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)},
+   *  but you can specify whether more RAM should be consumed in exchange for
+   *  faster lookups (default is "true").  Note that the
+   *  first call for a given reader and field "wins",
+   *  subsequent calls will share the same cache entry. */
+  public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
+
+  /** Checks the internal cache for an appropriate entry, and if none
+   * is found, reads the term values in <code>field</code>
+   * and returns a {@link SortedDocValues} instance,
+   * providing methods to retrieve sort ordinals and terms
+   * (as a ByteRef) per document.
+   * @param reader  Used to get field values.
+   * @param field   Which field contains the strings.
+   * @return The values in the given field for each document.
+   * @throws IOException  If any error occurs.
+   */
+  public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException;
+
+  /** Expert: just like {@link
+   *  #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify
+   *  whether more RAM should be consumed in exchange for
+   *  faster lookups (default is "true").  Note that the
+   *  first call for a given reader and field "wins",
+   *  subsequent calls will share the same cache entry. */
+  public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException;
+
+  /** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
+  public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT });
+  /** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
+  public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG });
+  
+  /**
+   * Checks the internal cache for an appropriate entry, and if none is found, reads the term values
+   * in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
+   * the terms (as ords) per document.
+   *
+   * @param reader  Used to build a {@link DocTermOrds} instance
+   * @param field   Which field contains the strings.
+   * @param prefix  prefix for a subset of the terms which should be uninverted. Can be null or
+   *                {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
+   *                
+   * @return a {@link DocTermOrds} instance
+   * @throws IOException  If any error occurs.
+   */
+  public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException;
+
+  /**
+   * EXPERT: A unique Identifier/Description for each item in the FieldCache. 
+   * Can be useful for logging/debugging.
+   * @lucene.experimental
+   */
+  public final class CacheEntry {
+
+    private final Object readerKey;
+    private final String fieldName;
+    private final Class<?> cacheType;
+    private final Object custom;
+    private final Accountable value;
+
+    public CacheEntry(Object readerKey, String fieldName,
+                      Class<?> cacheType,
+                      Object custom,
+                      Accountable value) {
+      this.readerKey = readerKey;
+      this.fieldName = fieldName;
+      this.cacheType = cacheType;
+      this.custom = custom;
+      this.value = value;
+    }
+
+    public Object getReaderKey() {
+      return readerKey;
+    }
+
+    public String getFieldName() {
+      return fieldName;
+    }
+
+    public Class<?> getCacheType() {
+      return cacheType;
+    }
+
+    public Object getCustom() {
+      return custom;
+    }
+
+    public Object getValue() {
+      return value;
+    }
+
+    /**
+     * The most recently estimated size of the value, null unless 
+     * estimateSize has been called.
+     */
+    public String getEstimatedSize() {
+      long bytesUsed = value == null ? 0L : value.ramBytesUsed();
+      return RamUsageEstimator.humanReadableUnits(bytesUsed);
+    }
+    
+    @Override
+    public String toString() {
+      StringBuilder b = new StringBuilder(250);
+      b.append("'").append(getReaderKey()).append("'=>");
+      b.append("'").append(getFieldName()).append("',");
+      b.append(getCacheType()).append(",").append(getCustom());
+      b.append("=>").append(getValue().getClass().getName()).append("#");
+      b.append(System.identityHashCode(getValue()));
+      
+      String s = getEstimatedSize();
+      b.append(" (size =~ ").append(s).append(')');
+
+      return b.toString();
+    }
+  }
+  
+  /**
+   * EXPERT: Generates an array of CacheEntry objects representing all items 
+   * currently in the FieldCache.
+   * <p>
+   * NOTE: These CacheEntry objects maintain a strong reference to the 
+   * Cached Values.  Maintaining references to a CacheEntry the AtomicIndexReader 
+   * associated with it has garbage collected will prevent the Value itself
+   * from being garbage collected when the Cache drops the WeakReference.
+   * </p>
+   * @lucene.experimental
+   */
+  public CacheEntry[] getCacheEntries();
+
+  /**
+   * <p>
+   * EXPERT: Instructs the FieldCache to forcibly expunge all entries 
+   * from the underlying caches.  This is intended only to be used for 
+   * test methods as a way to ensure a known base state of the Cache 
+   * (with out needing to rely on GC to free WeakReferences).  
+   * It should not be relied on for "Cache maintenance" in general 
+   * application code.
+   * </p>
+   * @lucene.experimental
+   */
+  public void purgeAllCaches();
+
+  /**
+   * Expert: drops all cache entries associated with this
+   * reader {@link IndexReader#getCoreCacheKey}.  NOTE: this cache key must
+   * precisely match the reader that the cache entry is
+   * keyed on. If you pass a top-level reader, it usually
+   * will have no effect as Lucene now caches at the segment
+   * reader level.
+   */
+  public void purgeByCacheKey(Object coreCacheKey);
+
+  /**
+   * If non-null, FieldCacheImpl will warn whenever
+   * entries are created that are not sane according to
+   * {@link FieldCacheSanityChecker}.
+   */
+  public void setInfoStream(PrintStream stream);
+
+  /** counterpart of {@link #setInfoStream(PrintStream)} */
+  public PrintStream getInfoStream();
+}


Mime
View raw message