lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject [3/6] lucene-solr:branch_6x: SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
Date Fri, 27 May 2016 16:42:21 GMT
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5525f429/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java
new file mode 100644
index 0000000..f1627a6
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java
@@ -0,0 +1,681 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.uninverting;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LegacyIntField;
+import org.apache.lucene.document.LegacyLongField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LegacyNumericUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.index.SlowCompositeReaderWrapper;
+
+// TODO:
+//   - test w/ del docs
+//   - test prefix
+//   - test w/ cutoff
+//   - crank docs way up so we get some merging sometimes
+
+public class TestDocTermOrds extends LuceneTestCase {
+
+  public void testEmptyIndex() throws IOException {
+    final Directory dir = newDirectory();
+    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
+    iw.close();
+    
+    final DirectoryReader ir = DirectoryReader.open(dir);
+    TestUtil.checkReader(ir);
+    
+    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
+    TestUtil.checkReader(composite);
+    
+    // check the leaves
+    // (normally there are none for an empty index, so this is really just future
+    // proofing in case that changes for some reason)
+    for (LeafReaderContext rc : ir.leaves()) {
+      final LeafReader r = rc.reader();
+      final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
+      assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
+      assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
+    }
+
+    // check the composite 
+    final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
+    assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
+    assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testSimple() throws Exception {
+    Directory dir = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
+    Document doc = new Document();
+    Field field = newTextField("field", "", Field.Store.NO);
+    doc.add(field);
+    field.setStringValue("a b c");
+    w.addDocument(doc);
+
+    field.setStringValue("d e f");
+    w.addDocument(doc);
+
+    field.setStringValue("a f");
+    w.addDocument(doc);
+    
+    final IndexReader r = w.getReader();
+    w.close();
+
+    final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
+    TestUtil.checkReader(ar);
+    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
+    SortedSetDocValues iter = dto.iterator(ar);
+    
+    iter.setDocument(0);
+    assertEquals(0, iter.nextOrd());
+    assertEquals(1, iter.nextOrd());
+    assertEquals(2, iter.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
+    
+    iter.setDocument(1);
+    assertEquals(3, iter.nextOrd());
+    assertEquals(4, iter.nextOrd());
+    assertEquals(5, iter.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
+
+    iter.setDocument(2);
+    assertEquals(0, iter.nextOrd());
+    assertEquals(5, iter.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
+
+    r.close();
+    dir.close();
+  }
+
+  public void testRandom() throws Exception {
+    Directory dir = newDirectory();
+
+    final int NUM_TERMS = atLeast(20);
+    final Set<BytesRef> terms = new HashSet<>();
+    while(terms.size() < NUM_TERMS) {
+      final String s = TestUtil.randomRealisticUnicodeString(random());
+      //final String s = _TestUtil.randomSimpleString(random);
+      if (s.length() > 0) {
+        terms.add(new BytesRef(s));
+      }
+    }
+    final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
+    Arrays.sort(termsArray);
+    
+    final int NUM_DOCS = atLeast(100);
+
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+
+    // Sometimes swap in codec that impls ord():
+    if (random().nextInt(10) == 7) {
+      // Make sure terms index has ords:
+      Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
+      conf.setCodec(codec);
+    }
+    
+    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
+
+    final int[][] idToOrds = new int[NUM_DOCS][];
+    final Set<Integer> ordsForDocSet = new HashSet<>();
+
+    for(int id=0;id<NUM_DOCS;id++) {
+      Document doc = new Document();
+
+      doc.add(new LegacyIntField("id", id, Field.Store.YES));
+      
+      final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
+      while(ordsForDocSet.size() < termCount) {
+        ordsForDocSet.add(random().nextInt(termsArray.length));
+      }
+      final int[] ordsForDoc = new int[termCount];
+      int upto = 0;
+      if (VERBOSE) {
+        System.out.println("TEST: doc id=" + id);
+      }
+      for(int ord : ordsForDocSet) {
+        ordsForDoc[upto++] = ord;
+        Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
+        if (VERBOSE) {
+          System.out.println("  f=" + termsArray[ord].utf8ToString());
+        }
+        doc.add(field);
+      }
+      ordsForDocSet.clear();
+      Arrays.sort(ordsForDoc);
+      idToOrds[id] = ordsForDoc;
+      w.addDocument(doc);
+    }
+    
+    final DirectoryReader r = w.getReader();
+    w.close();
+
+    if (VERBOSE) {
+      System.out.println("TEST: reader=" + r);
+    }
+
+    for(LeafReaderContext ctx : r.leaves()) {
+      if (VERBOSE) {
+        System.out.println("\nTEST: sub=" + ctx.reader());
+      }
+      verify(ctx.reader(), idToOrds, termsArray, null);
+    }
+
+    // Also test top-level reader: its enum does not support
+    // ord, so this forces the OrdWrapper to run:
+    if (VERBOSE) {
+      System.out.println("TEST: top reader");
+    }
+    LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
+    TestUtil.checkReader(slowR);
+    verify(slowR, idToOrds, termsArray, null);
+
+    FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
+
+    r.close();
+    dir.close();
+  }
+
+  public void testRandomWithPrefix() throws Exception {
+    Directory dir = newDirectory();
+
+    final Set<String> prefixes = new HashSet<>();
+    final int numPrefix = TestUtil.nextInt(random(), 2, 7);
+    if (VERBOSE) {
+      System.out.println("TEST: use " + numPrefix + " prefixes");
+    }
+    while(prefixes.size() < numPrefix) {
+      prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
+      //prefixes.add(_TestUtil.randomSimpleString(random));
+    }
+    final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
+
+    final int NUM_TERMS = atLeast(20);
+    final Set<BytesRef> terms = new HashSet<>();
+    while(terms.size() < NUM_TERMS) {
+      final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
+      //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
+      if (s.length() > 0) {
+        terms.add(new BytesRef(s));
+      }
+    }
+    final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
+    Arrays.sort(termsArray);
+    
+    final int NUM_DOCS = atLeast(100);
+
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+
+    // Sometimes swap in codec that impls ord():
+    if (random().nextInt(10) == 7) {
+      Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
+      conf.setCodec(codec);
+    }
+    
+    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
+
+    final int[][] idToOrds = new int[NUM_DOCS][];
+    final Set<Integer> ordsForDocSet = new HashSet<>();
+
+    for(int id=0;id<NUM_DOCS;id++) {
+      Document doc = new Document();
+
+      doc.add(new LegacyIntField("id", id, Field.Store.YES));
+      
+      final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
+      while(ordsForDocSet.size() < termCount) {
+        ordsForDocSet.add(random().nextInt(termsArray.length));
+      }
+      final int[] ordsForDoc = new int[termCount];
+      int upto = 0;
+      if (VERBOSE) {
+        System.out.println("TEST: doc id=" + id);
+      }
+      for(int ord : ordsForDocSet) {
+        ordsForDoc[upto++] = ord;
+        Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
+        if (VERBOSE) {
+          System.out.println("  f=" + termsArray[ord].utf8ToString());
+        }
+        doc.add(field);
+      }
+      ordsForDocSet.clear();
+      Arrays.sort(ordsForDoc);
+      idToOrds[id] = ordsForDoc;
+      w.addDocument(doc);
+    }
+    
+    final DirectoryReader r = w.getReader();
+    w.close();
+
+    if (VERBOSE) {
+      System.out.println("TEST: reader=" + r);
+    }
+    
+    LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
+    TestUtil.checkReader(slowR);
+    for(String prefix : prefixesArray) {
+
+      final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
+
+      final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
+      for(int id=0;id<NUM_DOCS;id++) {
+        final int[] docOrds = idToOrds[id];
+        final List<Integer> newOrds = new ArrayList<>();
+        for(int ord : idToOrds[id]) {
+          if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
+            newOrds.add(ord);
+          }
+        }
+        final int[] newOrdsArray = new int[newOrds.size()];
+        int upto = 0;
+        for(int ord : newOrds) {
+          newOrdsArray[upto++] = ord;
+        }
+        idToOrdsPrefix[id] = newOrdsArray;
+      }
+
+      for(LeafReaderContext ctx : r.leaves()) {
+        if (VERBOSE) {
+          System.out.println("\nTEST: sub=" + ctx.reader());
+        }
+        verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
+      }
+
+      // Also test top-level reader: its enum does not support
+      // ord, so this forces the OrdWrapper to run:
+      if (VERBOSE) {
+        System.out.println("TEST: top reader");
+      }
+      verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
+    }
+
+    FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
+
+    r.close();
+    dir.close();
+  }
+
+  private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
+
+    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(),
+                                            "field",
+                                            prefixRef,
+                                            Integer.MAX_VALUE,
+                                            TestUtil.nextInt(random(), 2, 10));
+                                            
+
+    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER, false);
+    /*
+      for(int docID=0;docID<subR.maxDoc();docID++) {
+      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
+      }
+    */
+
+    if (VERBOSE) {
+      System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString()));
+      System.out.println("TEST: all TERMS:");
+      TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
+      int ord = 0;
+      while(allTE.next() != null) {
+        System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
+      }
+    }
+
+    //final TermsEnum te = subR.fields().terms("field").iterator();
+    final TermsEnum te = dto.getOrdTermsEnum(r);
+    if (dto.numTerms() == 0) {
+      if (prefixRef == null) {
+        assertNull(MultiFields.getTerms(r, "field"));
+      } else {
+        Terms terms = MultiFields.getTerms(r, "field");
+        if (terms != null) {
+          TermsEnum termsEnum = terms.iterator();
+          TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
+          if (result != TermsEnum.SeekStatus.END) {
+            assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
+          } else {
+            // ok
+          }
+        } else {
+          // ok
+        }
+      }
+      return;
+    }
+
+    if (VERBOSE) {
+      System.out.println("TEST: TERMS:");
+      te.seekExact(0);
+      while(true) {
+        System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
+        if (te.next() == null) {
+          break;
+        }
+      }
+    }
+
+    SortedSetDocValues iter = dto.iterator(r);
+    for(int docID=0;docID<r.maxDoc();docID++) {
+      if (VERBOSE) {
+        System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
+      }
+      iter.setDocument(docID);
+      final int[] answers = idToOrds[(int) docIDToID.get(docID)];
+      int upto = 0;
+      long ord;
+      while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+        te.seekExact(ord);
+        final BytesRef expected = termsArray[answers[upto++]];
+        if (VERBOSE) {
+          System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
+        }
+        assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
+      }
+      assertEquals(answers.length, upto);
+    }
+  }
+  
+  public void testBackToTheFuture() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    
+    Document doc = new Document();
+    doc.add(newStringField("foo", "bar", Field.Store.NO));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(newStringField("foo", "baz", Field.Store.NO));
+    // we need a second value for a doc, or we don't actually test DocTermOrds!
+    doc.add(newStringField("foo", "car", Field.Store.NO));
+    iw.addDocument(doc);
+    
+    DirectoryReader r1 = DirectoryReader.open(iw);
+    
+    iw.deleteDocuments(new Term("foo", "baz"));
+    DirectoryReader r2 = DirectoryReader.open(iw);
+    
+    FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
+    
+    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
+    assertEquals(3, v.getValueCount());
+    v.setDocument(1);
+    assertEquals(1, v.nextOrd());
+    
+    iw.close();
+    r1.close();
+    r2.close();
+    dir.close();
+  }
+  
+  public void testNumericEncoded32() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    
+    Document doc = new Document();
+    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
+    doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
+    iw.addDocument(doc);
+    
+    iw.forceMerge(1);
+    iw.close();
+    
+    DirectoryReader ir = DirectoryReader.open(dir);
+    LeafReader ar = getOnlyLeafReader(ir);
+    
+    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
+    assertEquals(2, v.getValueCount());
+    
+    v.setDocument(0);
+    assertEquals(1, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    v.setDocument(1);
+    assertEquals(0, v.nextOrd());
+    assertEquals(1, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    BytesRef value = v.lookupOrd(0);
+    assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
+    
+    value = v.lookupOrd(1);
+    assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
+    
+    ir.close();
+    dir.close();
+  }
+  
+  public void testNumericEncoded64() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    
+    Document doc = new Document();
+    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
+    doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
+    iw.addDocument(doc);
+    
+    iw.forceMerge(1);
+    iw.close();
+    
+    DirectoryReader ir = DirectoryReader.open(dir);
+    LeafReader ar = getOnlyLeafReader(ir);
+    
+    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
+    assertEquals(2, v.getValueCount());
+    
+    v.setDocument(0);
+    assertEquals(1, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    v.setDocument(1);
+    assertEquals(0, v.nextOrd());
+    assertEquals(1, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    BytesRef value = v.lookupOrd(0);
+    assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
+    
+    value = v.lookupOrd(1);
+    assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
+    
+    ir.close();
+    dir.close();
+  }
+  
+  public void testSortedTermsEnum() throws IOException {
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new StringField("field", "hello", Field.Store.NO));
+    iwriter.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new StringField("field", "world", Field.Store.NO));
+    // we need a second value for a doc, or we don't actually test DocTermOrds!
+    doc.add(new StringField("field", "hello", Field.Store.NO));
+    iwriter.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new StringField("field", "beer", Field.Store.NO));
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+
+    LeafReader ar = getOnlyLeafReader(ireader);
+    SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
+    assertEquals(3, dv.getValueCount());
+    
+    TermsEnum termsEnum = dv.termsEnum();
+    
+    // next()
+    assertEquals("beer", termsEnum.next().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals("world", termsEnum.next().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    
+    // seekCeil()
+    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
+    
+    // seekExact()
+    assertTrue(termsEnum.seekExact(new BytesRef("beer")));
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertTrue(termsEnum.seekExact(new BytesRef("hello")));
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertTrue(termsEnum.seekExact(new BytesRef("world")));
+    assertEquals("world", termsEnum.term().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
+    
+    // seek(ord)
+    termsEnum.seekExact(0);
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    termsEnum.seekExact(1);
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    termsEnum.seekExact(2);
+    assertEquals("world", termsEnum.term().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    
+    // lookupTerm(BytesRef) 
+    assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
+    assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
+    assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
+    assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
+    assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
+    assertEquals(2, dv.lookupTerm(new BytesRef("world")));
+    assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
+
+    ireader.close();
+    directory.close();
+  }
+  
+  public void testActuallySingleValued() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwconfig =  newIndexWriterConfig(null);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    IndexWriter iw = new IndexWriter(dir, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new StringField("foo", "bar", Field.Store.NO));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new StringField("foo", "baz", Field.Store.NO));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new StringField("foo", "baz", Field.Store.NO));
+    doc.add(new StringField("foo", "baz", Field.Store.NO));
+    iw.addDocument(doc);
+    
+    iw.forceMerge(1);
+    iw.close();
+    
+    DirectoryReader ir = DirectoryReader.open(dir);
+    LeafReader ar = getOnlyLeafReader(ir);
+    
+    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
+    assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
+    assertEquals(2, v.getValueCount());
+    
+    v.setDocument(0);
+    assertEquals(0, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    v.setDocument(1);
+    assertEquals(1, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    v.setDocument(2);
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    v.setDocument(3);
+    assertEquals(1, v.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
+    
+    BytesRef value = v.lookupOrd(0);
+    assertEquals("bar", value.utf8ToString());
+    
+    value = v.lookupOrd(1);
+    assertEquals("baz", value.utf8ToString());
+    
+    ir.close();
+    dir.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5525f429/solr/core/src/test/org/apache/solr/uninverting/TestFieldCache.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCache.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCache.java
new file mode 100644
index 0000000..af9ea95
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCache.java
@@ -0,0 +1,731 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.uninverting;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LegacyDoubleField;
+import org.apache.lucene.document.LegacyFloatField;
+import org.apache.lucene.document.LegacyIntField;
+import org.apache.lucene.document.LegacyLongField;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LegacyNumericUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.index.SlowCompositeReaderWrapper;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestFieldCache extends LuceneTestCase {
+  private static LeafReader reader;
+  private static int NUM_DOCS;
+  private static int NUM_ORDS;
+  private static String[] unicodeStrings;
+  private static BytesRef[][] multiValued;
+  private static Directory directory;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    NUM_DOCS = atLeast(500);
+    NUM_ORDS = atLeast(2);
+    directory = newDirectory();
+    IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
+    long theLong = Long.MAX_VALUE;
+    double theDouble = Double.MAX_VALUE;
+    int theInt = Integer.MAX_VALUE;
+    float theFloat = Float.MAX_VALUE;
+    unicodeStrings = new String[NUM_DOCS];
+    multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
+    if (VERBOSE) {
+      System.out.println("TEST: setUp");
+    }
+    for (int i = 0; i < NUM_DOCS; i++){
+      Document doc = new Document();
+      doc.add(new LongPoint("theLong", theLong--));
+      doc.add(new DoublePoint("theDouble", theDouble--));
+      doc.add(new IntPoint("theInt", theInt--));
+      doc.add(new FloatPoint("theFloat", theFloat--));
+      if (i%2 == 0) {
+        doc.add(new IntPoint("sparse", i));
+      }
+
+      if (i%2 == 0) {
+        doc.add(new IntPoint("numInt", i));
+      }
+
+      // sometimes skip the field:
+      if (random().nextInt(40) != 17) {
+        unicodeStrings[i] = generateString(i);
+        doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
+      }
+
+      // sometimes skip the field:
+      if (random().nextInt(10) != 8) {
+        for (int j = 0; j < NUM_ORDS; j++) {
+          String newValue = generateString(i);
+          multiValued[i][j] = new BytesRef(newValue);
+          doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
+        }
+        Arrays.sort(multiValued[i]);
+      }
+      writer.addDocument(doc);
+    }
+    writer.forceMerge(1); // this test relies on one segment and docid order
+    IndexReader r = DirectoryReader.open(writer);
+    assertEquals(1, r.leaves().size());
+    reader = r.leaves().get(0).reader();
+    TestUtil.checkReader(reader);
+    writer.close();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    reader.close();
+    reader = null;
+    directory.close();
+    directory = null;
+    unicodeStrings = null;
+    multiValued = null;
+  }
+  
+  public void test() throws IOException {
+    FieldCache cache = FieldCache.DEFAULT;
+    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean());
+    assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean()));
+    for (int i = 0; i < NUM_DOCS; i++) {
+      assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
+    }
+    
+    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean());
+    assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean()));
+    for (int i = 0; i < NUM_DOCS; i++) {
+      assertEquals(Long.MAX_VALUE - i, longs.get(i));
+    }
+
+    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
+    assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()));
+    for (int i = 0; i < NUM_DOCS; i++) {
+      assertEquals(Integer.MAX_VALUE - i, ints.get(i));
+    }
+    
+    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean());
+    assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean()));
+    for (int i = 0; i < NUM_DOCS; i++) {
+      assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
+    }
+
+    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
+    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
+    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
+    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
+    for (int i = 0; i < docsWithField.length(); i++) {
+      assertTrue(docsWithField.get(i));
+    }
+    
+    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
+    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
+    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
+    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
+    for (int i = 0; i < docsWithField.length(); i++) {
+      assertEquals(i%2 == 0, docsWithField.get(i));
+    }
+
+    // getTermsIndex
+    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
+    for (int i = 0; i < NUM_DOCS; i++) {
+      final String s;
+      final int ord = termsIndex.getOrd(i);
+      if (ord == -1) {
+        s = null;
+      } else {
+        s = termsIndex.lookupOrd(ord).utf8ToString();
+      }
+      assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
+    }
+
+    int nTerms = termsIndex.getValueCount();
+
+    TermsEnum tenum = termsIndex.termsEnum();
+    for (int i=0; i<nTerms; i++) {
+      BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
+      final BytesRef val = termsIndex.lookupOrd(i);
+      // System.out.println("i="+i);
+      assertEquals(val, val1);
+    }
+
+    // seek the enum around (note this isn't a great test here)
+    int num = atLeast(100);
+    for (int i = 0; i < num; i++) {
+      int k = random().nextInt(nTerms);
+      final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
+      assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
+      assertEquals(val, tenum.term());
+    }
+
+    for(int i=0;i<nTerms;i++) {
+      final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
+      assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
+      assertEquals(val, tenum.term());
+    }
+
+    // test bad field
+    termsIndex = cache.getTermsIndex(reader, "bogusfield");
+
+    // getTerms
+    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
+    Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString", null);
+    for (int i = 0; i < NUM_DOCS; i++) {
+      final String s;
+      if (!bits.get(i)) {
+        s = null;
+      } else {
+        s = terms.get(i).utf8ToString();
+      }
+      assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
+    }
+
+    // test bad field
+    terms = cache.getTerms(reader, "bogusfield", false);
+
+    // getDocTermOrds
+    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
+    int numEntries = cache.getCacheEntries().length;
+    // ask for it again, and check that we didnt create any additional entries:
+    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
+    assertEquals(numEntries, cache.getCacheEntries().length);
+
+    for (int i = 0; i < NUM_DOCS; i++) {
+      termOrds.setDocument(i);
+      // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
+      List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
+      for (BytesRef v : values) {
+        if (v == null) {
+          // why does this test use null values... instead of an empty list: confusing
+          break;
+        }
+        long ord = termOrds.nextOrd();
+        assert ord != SortedSetDocValues.NO_MORE_ORDS;
+        BytesRef scratch = termOrds.lookupOrd(ord);
+        assertEquals(v, scratch);
+      }
+      assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
+    }
+
+    // test bad field
+    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
+    assertTrue(termOrds.getValueCount() == 0);
+
+    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
+  }
+
+  public void testEmptyIndex() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
+    writer.close();
+    IndexReader r = DirectoryReader.open(dir);
+    LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
+    TestUtil.checkReader(reader);
+    FieldCache.DEFAULT.getTerms(reader, "foobar", true);
+    FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
+    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
+    r.close();
+    dir.close();
+  }
+
+  private static String generateString(int i) {
+    String s = null;
+    if (i > 0 && random().nextInt(3) == 1) {
+      // reuse past string -- try to find one that's not null
+      for(int iter = 0; iter < 10 && s == null;iter++) {
+        s = unicodeStrings[random().nextInt(i)];
+      }
+      if (s == null) {
+        s = TestUtil.randomUnicodeString(random());
+      }
+    } else {
+      s = TestUtil.randomUnicodeString(random());
+    }
+    return s;
+  }
+
+  public void testDocsWithField() throws Exception {
+    FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+    assertEquals(0, cache.getCacheEntries().length);
+    cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true);
+
+    // The double[] takes one slots, and docsWithField should also
+    // have been populated:
+    assertEquals(2, cache.getCacheEntries().length);
+    Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
+
+    // No new entries should appear:
+    assertEquals(2, cache.getCacheEntries().length);
+    assertTrue(bits instanceof Bits.MatchAllBits);
+
+    NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
+    assertEquals(4, cache.getCacheEntries().length);
+    Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
+    assertEquals(4, cache.getCacheEntries().length);
+    for (int i = 0; i < docsWithField.length(); i++) {
+      if (i%2 == 0) {
+        assertTrue(docsWithField.get(i));
+        assertEquals(i, ints.get(i));
+      } else {
+        assertFalse(docsWithField.get(i));
+      }
+    }
+
+    NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
+    docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER);
+    for (int i = 0; i < docsWithField.length(); i++) {
+      if (i%2 == 0) {
+        assertTrue(docsWithField.get(i));
+        assertEquals(i, numInts.get(i));
+      } else {
+        assertFalse(docsWithField.get(i));
+      }
+    }
+  }
+  
+  public void testGetDocsWithFieldThreadSafety() throws Exception {
+    final FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+
+    int NUM_THREADS = 3;
+    Thread[] threads = new Thread[NUM_THREADS];
+    final AtomicBoolean failed = new AtomicBoolean();
+    final AtomicInteger iters = new AtomicInteger();
+    final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
+    final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
+                                                    new Runnable() {
+                                                      @Override
+                                                      public void run() {
+                                                        cache.purgeAllCaches();
+                                                        iters.incrementAndGet();
+                                                      }
+                                                    });
+    for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
+      threads[threadIDX] = new Thread() {
+          @Override
+          public void run() {
+
+            try {
+              while(!failed.get()) {
+                final int op = random().nextInt(3);
+                if (op == 0) {
+                  // Purge all caches & resume, once all
+                  // threads get here:
+                  restart.await();
+                  if (iters.get() >= NUM_ITER) {
+                    break;
+                  }
+                } else if (op == 1) {
+                  Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
+                  for (int i = 0; i < docsWithField.length(); i++) {
+                    assertEquals(i%2 == 0, docsWithField.get(i));
+                  }
+                } else {
+                  NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
+                  Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
+                  for (int i = 0; i < docsWithField.length(); i++) {
+                    if (i%2 == 0) {
+                      assertTrue(docsWithField.get(i));
+                      assertEquals(i, ints.get(i));
+                    } else {
+                      assertFalse(docsWithField.get(i));
+                    }
+                  }
+                }
+              }
+            } catch (Throwable t) {
+              failed.set(true);
+              restart.reset();
+              throw new RuntimeException(t);
+            }
+          }
+        };
+      threads[threadIDX].start();
+    }
+
+    for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
+      threads[threadIDX].join();
+    }
+    assertFalse(failed.get());
+  }
+  
+  public void testDocValuesIntegration() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(null);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
+    doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
+    doc.add(new NumericDocValuesField("numeric", 42));
+    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
+    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
+    iw.addDocument(doc);
+    DirectoryReader ir = iw.getReader();
+    iw.close();
+    LeafReader ar = getOnlyLeafReader(ir);
+    
+    // Binary type: can be retrieved via getTerms()
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false);
+    });
+    
+    BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
+    final BytesRef term = binary.get(0);
+    assertEquals("binary value", term.utf8ToString());
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getTermsIndex(ar, "binary");
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      new DocTermOrds(ar, null, "binary");
+    });
+    
+    Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
+    assertTrue(bits.get(0));
+    
+    // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false);
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      new DocTermOrds(ar, null, "sorted");
+    });
+    
+    binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
+    BytesRef scratch = binary.get(0);
+    assertEquals("sorted value", scratch.utf8ToString());
+    
+    SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
+    assertEquals(0, sorted.getOrd(0));
+    assertEquals(1, sorted.getValueCount());
+    scratch = sorted.get(0);
+    assertEquals("sorted value", scratch.utf8ToString());
+    
+    SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
+    sortedSet.setDocument(0);
+    assertEquals(0, sortedSet.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+    assertEquals(1, sortedSet.getValueCount());
+    
+    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
+    assertTrue(bits.get(0));
+    
+    // Numeric type: can be retrieved via getInts() and so on
+    NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false);
+    assertEquals(42, numeric.get(0));
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getTerms(ar, "numeric", true);
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      new DocTermOrds(ar, null, "numeric");
+    });
+    
+    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
+    assertTrue(bits.get(0));
+    
+    // SortedSet type: can be retrieved via getDocTermOrds() 
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false);
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
+    });
+    
+    expectThrows(IllegalStateException.class, () -> {
+      new DocTermOrds(ar, null, "sortedset");
+    });
+    
+    sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
+    sortedSet.setDocument(0);
+    assertEquals(0, sortedSet.nextOrd());
+    assertEquals(1, sortedSet.nextOrd());
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+    assertEquals(2, sortedSet.getValueCount());
+    
+    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
+    assertTrue(bits.get(0));
+    
+    ir.close();
+    dir.close();
+  }
+  
+  public void testNonexistantFields() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    iw.addDocument(doc);
+    DirectoryReader ir = iw.getReader();
+    iw.close();
+    
+    LeafReader ar = getOnlyLeafReader(ir);
+    
+    final FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+    assertEquals(0, cache.getCacheEntries().length);
+    
+    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
+    assertEquals(0, ints.get(0));
+    
+    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
+    assertEquals(0, longs.get(0));
+    
+    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
+    assertEquals(0, floats.get(0));
+    
+    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
+    assertEquals(0, doubles.get(0));
+    
+    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
+    BytesRef scratch = binaries.get(0);
+    assertEquals(0, scratch.length);
+    
+    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
+    assertEquals(-1, sorted.getOrd(0));
+    scratch = sorted.get(0);
+    assertEquals(0, scratch.length);
+    
+    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
+    sortedSet.setDocument(0);
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+    
+    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
+    assertFalse(bits.get(0));
+    
+    // check that we cached nothing
+    assertEquals(0, cache.getCacheEntries().length);
+    ir.close();
+    dir.close();
+  }
+  
+  public void testNonIndexedFields() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    doc.add(new StoredField("bogusbytes", "bogus"));
+    doc.add(new StoredField("bogusshorts", "bogus"));
+    doc.add(new StoredField("bogusints", "bogus"));
+    doc.add(new StoredField("boguslongs", "bogus"));
+    doc.add(new StoredField("bogusfloats", "bogus"));
+    doc.add(new StoredField("bogusdoubles", "bogus"));
+    doc.add(new StoredField("bogusterms", "bogus"));
+    doc.add(new StoredField("bogustermsindex", "bogus"));
+    doc.add(new StoredField("bogusmultivalued", "bogus"));
+    doc.add(new StoredField("bogusbits", "bogus"));
+    iw.addDocument(doc);
+    DirectoryReader ir = iw.getReader();
+    iw.close();
+    
+    LeafReader ar = getOnlyLeafReader(ir);
+    
+    final FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+    assertEquals(0, cache.getCacheEntries().length);
+    
+    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
+    assertEquals(0, ints.get(0));
+    
+    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
+    assertEquals(0, longs.get(0));
+    
+    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
+    assertEquals(0, floats.get(0));
+    
+    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
+    assertEquals(0, doubles.get(0));
+    
+    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
+    BytesRef scratch = binaries.get(0);
+    assertEquals(0, scratch.length);
+    
+    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
+    assertEquals(-1, sorted.getOrd(0));
+    scratch = sorted.get(0);
+    assertEquals(0, scratch.length);
+    
+    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
+    sortedSet.setDocument(0);
+    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+    
+    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
+    assertFalse(bits.get(0));
+    
+    // check that we cached nothing
+    assertEquals(0, cache.getCacheEntries().length);
+    ir.close();
+    dir.close();
+  }
+
+  // Make sure that the use of GrowableWriter doesn't prevent from using the full long range
+  public void testLongFieldCache() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
+    cfg.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
+    Document doc = new Document();
+    LongPoint field = new LongPoint("f", 0L);
+    StoredField field2 = new StoredField("f", 0L);
+    doc.add(field);
+    doc.add(field2);
+    final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
+    for (int i = 0; i < values.length; ++i) {
+      final long v;
+      switch (random().nextInt(10)) {
+        case 0:
+          v = Long.MIN_VALUE;
+          break;
+        case 1:
+          v = 0;
+          break;
+        case 2:
+          v = Long.MAX_VALUE;
+          break;
+        default:
+          v = TestUtil.nextLong(random(), -10, 10);
+          break;
+      }
+      values[i] = v;
+      if (v == 0 && random().nextBoolean()) {
+        // missing
+        iw.addDocument(new Document());
+      } else {
+        field.setLongValue(v);
+        field2.setLongValue(v);
+        iw.addDocument(doc);
+      }
+    }
+    iw.forceMerge(1);
+    final DirectoryReader reader = iw.getReader();
+    final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false);
+    for (int i = 0; i < values.length; ++i) {
+      assertEquals(values[i], longs.get(i));
+    }
+    reader.close();
+    iw.close();
+    dir.close();
+  }
+
+  // Make sure that the use of GrowableWriter doesn't prevent from using the full int range
+  public void testIntFieldCache() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
+    cfg.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
+    Document doc = new Document();
+    IntPoint field = new IntPoint("f", 0);
+    doc.add(field);
+    final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
+    for (int i = 0; i < values.length; ++i) {
+      final int v;
+      switch (random().nextInt(10)) {
+        case 0:
+          v = Integer.MIN_VALUE;
+          break;
+        case 1:
+          v = 0;
+          break;
+        case 2:
+          v = Integer.MAX_VALUE;
+          break;
+        default:
+          v = TestUtil.nextInt(random(), -10, 10);
+          break;
+      }
+      values[i] = v;
+      if (v == 0 && random().nextBoolean()) {
+        // missing
+        iw.addDocument(new Document());
+      } else {
+        field.setIntValue(v);
+        iw.addDocument(doc);
+      }
+    }
+    iw.forceMerge(1);
+    final DirectoryReader reader = iw.getReader();
+    final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false);
+    for (int i = 0; i < values.length; ++i) {
+      assertEquals(values[i], ints.get(i));
+    }
+    reader.close();
+    iw.close();
+    dir.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5525f429/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java
new file mode 100644
index 0000000..18c6420
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheReopen.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.uninverting;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestFieldCacheReopen extends LuceneTestCase {
+  
+  // TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
+  
+  // LUCENE-1579: Ensure that on a reopened reader, that any
+  // shared segments reuse the doc values arrays in
+  // FieldCache
+  public void testFieldCacheReuseAfterReopen() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(
+        dir,
+        newIndexWriterConfig(new MockAnalyzer(random())).
+            setMergePolicy(newLogMergePolicy(10))
+    );
+    Document doc = new Document();
+    doc.add(new IntPoint("number", 17));
+    writer.addDocument(doc);
+    writer.commit();
+  
+    // Open reader1
+    DirectoryReader r = DirectoryReader.open(dir);
+    LeafReader r1 = getOnlyLeafReader(r);
+    final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false);
+    assertEquals(17, ints.get(0));
+  
+    // Add new segment
+    writer.addDocument(doc);
+    writer.commit();
+  
+    // Reopen reader1 --> reader2
+    DirectoryReader r2 = DirectoryReader.openIfChanged(r);
+    assertNotNull(r2);
+    r.close();
+    LeafReader sub0 = r2.leaves().get(0).reader();
+    final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false);
+    r2.close();
+    assertTrue(ints == ints2);
+  
+    writer.close();
+    dir.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5525f429/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java
new file mode 100644
index 0000000..d54d579
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSanityChecker.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.uninverting;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LegacyDoubleField;
+import org.apache.lucene.document.LegacyFloatField;
+import org.apache.lucene.document.LegacyIntField;
+import org.apache.lucene.document.LegacyLongField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.index.SlowCompositeReaderWrapper;
+import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity;
+import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType;
+
+public class TestFieldCacheSanityChecker extends LuceneTestCase {
+
+  protected LeafReader readerA;
+  protected LeafReader readerB;
+  protected LeafReader readerX;
+  protected LeafReader readerAclone;
+  protected Directory dirA, dirB;
+  private static final int NUM_DOCS = 1000;
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    dirA = newDirectory();
+    dirB = newDirectory();
+
+    IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random())));
+    IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random())));
+
+    long theLong = Long.MAX_VALUE;
+    double theDouble = Double.MAX_VALUE;
+    int theInt = Integer.MAX_VALUE;
+    float theFloat = Float.MAX_VALUE;
+    for (int i = 0; i < NUM_DOCS; i++){
+      Document doc = new Document();
+      doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
+      doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
+      doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
+      doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
+      if (0 == i % 3) {
+        wA.addDocument(doc);
+      } else {
+        wB.addDocument(doc);
+      }
+    }
+    wA.close();
+    wB.close();
+    DirectoryReader rA = DirectoryReader.open(dirA);
+    readerA = SlowCompositeReaderWrapper.wrap(rA);
+    readerAclone = SlowCompositeReaderWrapper.wrap(rA);
+    readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
+    readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
+    readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    readerA.close();
+    readerAclone.close();
+    readerB.close();
+    readerX.close();
+    dirA.close();
+    dirB.close();
+    super.tearDown();
+  }
+
+  public void testSanity() throws IOException {
+    FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+
+    cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
+    cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
+    cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
+
+    cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
+
+    // // // 
+
+    Insanity[] insanity = 
+      FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
+    
+    if (0 < insanity.length)
+      dumpArray(getTestClass().getName() + "#" + getTestName() 
+          + " INSANITY", insanity, System.err);
+
+    assertEquals("shouldn't be any cache insanity", 0, insanity.length);
+    cache.purgeAllCaches();
+  }
+
+  public void testInsanity1() throws IOException {
+    FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+
+    cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
+    cache.getTerms(readerX, "theInt", false);
+
+    // // // 
+
+    Insanity[] insanity = 
+      FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
+
+    assertEquals("wrong number of cache errors", 1, insanity.length);
+    assertEquals("wrong type of cache error", 
+                 InsanityType.VALUEMISMATCH,
+                 insanity[0].getType());
+    assertEquals("wrong number of entries in cache error", 2,
+                 insanity[0].getCacheEntries().length);
+
+    // we expect bad things, don't let tearDown complain about them
+    cache.purgeAllCaches();
+  }
+
+  public void testInsanity2() throws IOException {
+    FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+
+    cache.getTerms(readerA, "theInt", false);
+    cache.getTerms(readerB, "theInt", false);
+    cache.getTerms(readerX, "theInt", false);
+
+
+    // // // 
+
+    Insanity[] insanity = 
+      FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
+    
+    assertEquals("wrong number of cache errors", 1, insanity.length);
+    assertEquals("wrong type of cache error", 
+                 InsanityType.SUBREADER,
+                 insanity[0].getType());
+    assertEquals("wrong number of entries in cache error", 3,
+                 insanity[0].getCacheEntries().length);
+
+    // we expect bad things, don't let tearDown complain about them
+    cache.purgeAllCaches();
+  }
+
+}


Mime
View raw message