lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kris...@apache.org
Subject [16/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7519: add optimized implementations for browse-only facets
Date Thu, 27 Oct 2016 20:09:20 GMT
LUCENE-7519: add optimized implementations for browse-only facets


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0782b095
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0782b095
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0782b095

Branch: refs/heads/jira/solr-8593
Commit: 0782b09571fc5ac3e92b566f9abc047b2bd7966c
Parents: b7aa582
Author: Mike McCandless <mikemccand@apache.org>
Authored: Tue Oct 25 06:22:23 2016 -0400
Committer: Mike McCandless <mikemccand@apache.org>
Committed: Tue Oct 25 06:22:23 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../DefaultSortedSetDocValuesReaderState.java   |   3 +-
 .../SortedSetDocValuesFacetCounts.java          | 124 +++++++++++++++----
 .../facet/taxonomy/FastTaxonomyFacetCounts.java |  49 ++++++++
 .../lucene/facet/taxonomy/TaxonomyFacets.java   |   4 +-
 .../sortedset/TestSortedSetDocValuesFacets.java |  25 ++--
 .../facet/taxonomy/TestTaxonomyFacetCounts.java |  84 +++++--------
 7 files changed, 202 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 954137f..d574a8a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -45,6 +45,9 @@ Optimizations
   that have a facet value, so sparse faceting works as expected
   (Adrien Grand via Mike McCandless)
 
+* LUCENE-7519: Add optimized APIs to compute browse-only top level
+  facets (Mike McCandless)
+
 Other
 
 * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
index 7bbe94a..b959d25 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
@@ -36,7 +36,8 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.BytesRef;
 
 /**
- * Default implementation of {@link SortedSetDocValuesFacetCounts}
+ * Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original
+ * {@link IndexReader} passed to the constructor is not closed whenever you use this class!
  */
 public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
index 4fff6a6..9ba8547 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
@@ -18,6 +18,7 @@ package org.apache.lucene.facet.sortedset;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -33,11 +34,15 @@ import org.apache.lucene.facet.TopOrdAndIntQueue;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongValues;
 
@@ -77,6 +82,17 @@ public class SortedSetDocValuesFacetCounts extends Facets {
     count(hits.getMatchingDocs());
   }
 
+  /** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but
faster. */
+  public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state)
+      throws IOException {
+    this.state = state;
+    this.field = state.getField();
+    dv = state.getDocValues();    
+    counts = new int[state.getSize()];
+    //System.out.println("field=" + field);
+    countAll();
+  }
+
   @Override
   public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException
{
     if (topN <= 0) {
@@ -176,7 +192,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
         continue;
       }
 
-      DocIdSetIterator docs = hits.bits.iterator();
+      DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(
+                                  hits.bits.iterator(), segValues));
 
       // TODO: yet another option is to count all segs
       // first, only in seg-ord space, and then do a
@@ -196,16 +213,12 @@ public class SortedSetDocValuesFacetCounts extends Facets {
         if (hits.totalHits < numSegOrds/10) {
           //System.out.println("    remap as-we-go");
           // Remap every ord to global ord as we iterate:
-          int doc;
-          while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-            //System.out.println("    doc=" + doc);
-            if (segValues.advanceExact(doc)) {
-              int term = (int) segValues.nextOrd();
-              while (term != SortedSetDocValues.NO_MORE_ORDS) {
-                //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd="
+ ordinalMap.getGlobalOrd(segOrd, term));
-                counts[(int) ordMap.get(term)]++;
-                term = (int) segValues.nextOrd();
-              }
+          for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc())
{
+            int term = (int) segValues.nextOrd();
+            while (term != SortedSetDocValues.NO_MORE_ORDS) {
+              //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd="
+ ordinalMap.getGlobalOrd(segOrd, term));
+              counts[(int) ordMap.get(term)]++;
+              term = (int) segValues.nextOrd();
             }
           }
         } else {
@@ -213,16 +226,12 @@ public class SortedSetDocValuesFacetCounts extends Facets {
 
           // First count in seg-ord space:
           final int[] segCounts = new int[numSegOrds];
-          int doc;
-          while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-            //System.out.println("    doc=" + doc);
-            if (segValues.advanceExact(doc)) {
-              int term = (int) segValues.nextOrd();
-              while (term != SortedSetDocValues.NO_MORE_ORDS) {
-                //System.out.println("      ord=" + term);
-                segCounts[term]++;
-                term = (int) segValues.nextOrd();
-              }
+          for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc())
{
+            int term = (int) segValues.nextOrd();
+            while (term != SortedSetDocValues.NO_MORE_ORDS) {
+              //System.out.println("      ord=" + term);
+              segCounts[term]++;
+              term = (int) segValues.nextOrd();
             }
           }
 
@@ -238,9 +247,76 @@ public class SortedSetDocValuesFacetCounts extends Facets {
       } else {
         // No ord mapping (e.g., single segment index):
         // just aggregate directly into counts:
-        int doc;
-        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-          if (segValues.advanceExact(doc)) {
+        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc())
{
+          int term = (int) segValues.nextOrd();
+          while (term != SortedSetDocValues.NO_MORE_ORDS) {
+            counts[term]++;
+            term = (int) segValues.nextOrd();
+          }
+        }
+      }
+    }
+  }
+
+  /** Does all the "real work" of tallying up the counts. */
+  private final void countAll() throws IOException {
+    //System.out.println("ssdv count");
+
+    MultiDocValues.OrdinalMap ordinalMap;
+
+    // TODO: is this right?  really, we need a way to
+    // verify that this ordinalMap "matches" the leaves in
+    // matchingDocs...
+    if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
+      ordinalMap = ((MultiSortedSetDocValues) dv).mapping;
+    } else {
+      ordinalMap = null;
+    }
+    
+    IndexReader origReader = state.getOrigReader();
+
+    for(LeafReaderContext context : origReader.leaves()) {
+
+      LeafReader reader = context.reader();
+      
+      SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
+      if (segValues == null) {
+        continue;
+      }
+
+      Bits liveDocs = reader.getLiveDocs();
+
+      if (ordinalMap != null) {
+        final LongValues ordMap = ordinalMap.getGlobalOrds(context.ord);
+
+        int numSegOrds = (int) segValues.getValueCount();
+
+        // First count in seg-ord space:
+        final int[] segCounts = new int[numSegOrds];
+        int docID;
+        while ((docID = segValues.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+          if (liveDocs == null || liveDocs.get(docID)) {
+            int term = (int) segValues.nextOrd();
+            while (term != SortedSetDocValues.NO_MORE_ORDS) {
+              segCounts[term]++;
+              term = (int) segValues.nextOrd();
+            }
+          }
+        }
+
+        // Then, migrate to global ords:
+        for(int ord=0;ord<numSegOrds;ord++) {
+          int count = segCounts[ord];
+          if (count != 0) {
+            counts[(int) ordMap.get(ord)] += count;
+          }
+        }
+      } else {
+        // No ord mapping (e.g., single segment index):
+        // just aggregate directly into counts:
+        int docID;
+        while ((docID = segValues.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+          if (liveDocs == null || liveDocs.get(docID)) {
             int term = (int) segValues.nextOrd();
             while (term != SortedSetDocValues.NO_MORE_ORDS) {
               counts[term]++;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
index ef96073..d560d40 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@@ -24,8 +24,12 @@ import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
 /** Computes facets counts, assuming the default encoding
@@ -50,6 +54,16 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
     count(fc.getMatchingDocs());
   }
 
+  /** Create {@code FastTaxonomyFacetCounts}, using the
+   *  specified {@code indexFieldName} for ordinals, and
+   *  counting all non-deleted documents in the index.  This is 
+   *  the same result as searching on {@link MatchAllDocsQuery},
+   *  but faster */
+  public FastTaxonomyFacetCounts(String indexFieldName, IndexReader reader, TaxonomyReader
taxoReader, FacetsConfig config) throws IOException {
+    super(indexFieldName, taxoReader, config);
+    countAll(reader);
+  }
+
   private final void count(List<MatchingDocs> matchingDocs) throws IOException {
     for(MatchingDocs hits : matchingDocs) {
       BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
@@ -82,4 +96,39 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
 
     rollup();
   }
+
+  private final void countAll(IndexReader reader) throws IOException {
+    for(LeafReaderContext context : reader.leaves()) {
+      BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
+      if (dv == null) { // this reader does not have DocValues for the requested category
list
+        continue;
+      }
+
+      Bits liveDocs = context.reader().getLiveDocs();
+
+      for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc())
{
+        if (liveDocs != null && liveDocs.get(doc) == false) {
+          continue;
+        }
+        final BytesRef bytesRef = dv.binaryValue();
+        byte[] bytes = bytesRef.bytes;
+        int end = bytesRef.offset + bytesRef.length;
+        int ord = 0;
+        int offset = bytesRef.offset;
+        int prev = 0;
+        while (offset < end) {
+          byte b = bytes[offset++];
+          if (b >= 0) {
+            prev = ord = ((ord << 7) | b) + prev;
+            ++values[ord];
+            ord = 0;
+          } else {
+            ord = (ord << 7) | (b & 0x7F);
+          }
+        }
+      }
+    }
+
+    rollup();
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
index d111b44..e1903d1 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
@@ -76,7 +76,7 @@ public abstract class TaxonomyFacets extends Facets {
   protected FacetsConfig.DimConfig verifyDim(String dim) {
     FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
     if (!dimConfig.indexFieldName.equals(indexFieldName)) {
-      throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into
field \"" + indexFieldName);
+      throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into
field \"" + indexFieldName + "\"");
     }
     return dimConfig;
   }
@@ -102,4 +102,4 @@ public abstract class TaxonomyFacets extends Facets {
     return results;
   }
   
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
index 60beddd..5aed22b 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.facet.sortedset;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -72,12 +73,8 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
 
     // Per-top-reader state:
     SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
-    
-    FacetsCollector c = new FacetsCollector();
 
-    searcher.search(new MatchAllDocsQuery(), c);
-
-    SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);
+    SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state);
 
     assertEquals("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n",
facets.getTopChildren(10, "a").toString());
     assertEquals("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.getTopChildren(10,
"b").toString());
@@ -171,9 +168,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
     // Per-top-reader state:
     SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
 
-    FacetsCollector c = new FacetsCollector();
-    searcher.search(new MatchAllDocsQuery(), c);    
-    SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);
+    SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state);
 
     // Ask for top 10 labels for any dims that have counts:
     List<FacetResult> results = facets.getAllDims(10);
@@ -215,9 +210,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
     // Per-top-reader state:
     SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
 
-    FacetsCollector c = new FacetsCollector();
-    searcher.search(new MatchAllDocsQuery(), c);    
-    SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);
+    SortedSetDocValuesFacetCounts facets = getAllFacets(searcher, state);
 
     // Ask for top 10 labels for any dims that have counts:
     assertEquals("dim=a path=[] value=2 childCount=2\n  foo1 (1)\n  foo2 (1)\n", facets.getTopChildren(10,
"a").toString());
@@ -312,4 +305,14 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
     w.close();
     IOUtils.close(searcher.getIndexReader(), indexDir, taxoDir);
   }
+
+  private static SortedSetDocValuesFacetCounts getAllFacets(IndexSearcher searcher, SortedSetDocValuesReaderState
state) throws IOException {
+    if (random().nextBoolean()) {
+      FacetsCollector c = new FacetsCollector();
+      searcher.search(new MatchAllDocsQuery(), c);    
+      return new SortedSetDocValuesFacetCounts(state, c);
+    } else {
+      return new SortedSetDocValuesFacetCounts(state);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0782b095/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
index 20bfdb5..3bb480d 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.facet.taxonomy;
 
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -102,16 +103,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     // NRT open
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
 
-    // Aggregate the facet counts:
-    FacetsCollector c = new FacetsCollector();
-
-    // MatchAllDocsQuery is for "browsing" (counts facets
-    // for all non-deleted docs in the index); normally
-    // you'd use a "normal" query, and use MultiCollector to
-    // wrap collecting the "normal" hits and also facets:
-    searcher.search(new MatchAllDocsQuery(), c);
-
-    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader,
config);
 
     // Retrieve & verify results:
     assertEquals("dim=Publish Date path=[] value=5 childCount=3\n  2010 (2)\n  2012 (2)\n
 1999 (1)\n", facets.getTopChildren(10, "Publish Date").toString());
@@ -120,7 +112,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     // Now user drills down on Publish Date/2010:
     DrillDownQuery q2 = new DrillDownQuery(config);
     q2.add("Publish Date", "2010");
-    c = new FacetsCollector();
+    FacetsCollector c = new FacetsCollector();
     searcher.search(q2, c);
     facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
     assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", facets.getTopChildren(10,
"Author").toString());
@@ -185,11 +177,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     // NRT open
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
 
-    FacetsCollector c = new FacetsCollector();
-    searcher.search(new MatchAllDocsQuery(), c);    
-
-    Facets facets = getTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);
-
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader,
config);
+    
     // Ask for top 10 labels for any dims that have counts:
     List<FacetResult> results = facets.getAllDims(10);
 
@@ -225,7 +214,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
 
     FacetsCollector c = new FacetsCollector();
-    searcher.search(new MatchAllDocsQuery(), c);    
+    searcher.search(new MatchAllDocsQuery(), c);
 
     // Uses default $facets field:
     Facets facets;
@@ -301,15 +290,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     // NRT open
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
     
-    // Aggregate the facet counts:
-    FacetsCollector c = new FacetsCollector();
-
-    // MatchAllDocsQuery is for "browsing" (counts facets
-    // for all non-deleted docs in the index); normally
-    // you'd use a "normal" query, and use MultiCollector to
-    // wrap collecting the "normal" hits and also facets:
-    searcher.search(new MatchAllDocsQuery(), c);
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader,
config);
 
     expectThrows(IllegalArgumentException.class, () -> {
       facets.getSpecificValue("a");
@@ -344,10 +325,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     // NRT open
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
 
-    FacetsCollector c = new FacetsCollector();
-    searcher.search(new MatchAllDocsQuery(), c);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader,
config);
     
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
     assertEquals(1, facets.getSpecificValue("dim", "test\u001Fone"));
     assertEquals(1, facets.getSpecificValue("dim", "test\u001Etwo"));
 
@@ -387,11 +366,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
 
     // NRT open
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
-
-    FacetsCollector c = new FacetsCollector();
-    searcher.search(new MatchAllDocsQuery(), c);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader,
config);
     
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
     assertEquals(1, facets.getTopChildren(10, "dim").value);
     assertEquals(1, facets.getTopChildren(10, "dim2").value);
     assertEquals(1, facets.getTopChildren(10, "dim3").value);
@@ -432,15 +408,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     // NRT open
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
     
-    // Aggregate the facet counts:
-    FacetsCollector c = new FacetsCollector();
-    
-    // MatchAllDocsQuery is for "browsing" (counts facets
-    // for all non-deleted docs in the index); normally
-    // you'd use a "normal" query, and use MultiCollector to
-    // wrap collecting the "normal" hits and also facets:
-    searcher.search(new MatchAllDocsQuery(), c);
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, c);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader,
config);
 
     FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim");
     assertEquals(numLabels, result.labelValues.length);
@@ -544,9 +512,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     DirectoryReader r = DirectoryReader.open(iw);
     DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
     
-    FacetsCollector sfc = new FacetsCollector();
-    newSearcher(r).search(new MatchAllDocsQuery(), sfc);
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader,
config);
+    
     for (FacetResult result : facets.getAllDims(10)) {
       assertEquals(r.numDocs(), result.value.intValue());
     }
@@ -572,10 +539,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     DirectoryReader r = DirectoryReader.open(iw);
     DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
 
-    final FacetsCollector sfc = new FacetsCollector();
-    newSearcher(r).search(new MatchAllDocsQuery(), sfc);
-
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader,
config);
+    
     List<FacetResult> res1 = facets.getAllDims(10);
     List<FacetResult> res2 = facets.getAllDims(10);
     assertEquals("calling getFacetResults twice should return the .equals()=true result",
res1, res2);
@@ -601,9 +566,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     DirectoryReader r = DirectoryReader.open(iw);
     DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
     
-    FacetsCollector sfc = new FacetsCollector();
-    newSearcher(r).search(new MatchAllDocsQuery(), sfc);
-    Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
+    Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader,
config);
     
     assertEquals(10, facets.getTopChildren(2, "a").childCount);
 
@@ -754,4 +717,21 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     w.close();
     IOUtils.close(tw, searcher.getIndexReader(), tr, indexDir, taxoDir);
   }
+
+  private static Facets getAllFacets(String indexFieldName, IndexSearcher searcher, TaxonomyReader
taxoReader, FacetsConfig config) throws IOException {
+    if (random().nextBoolean()) {
+      // Aggregate the facet counts:
+      FacetsCollector c = new FacetsCollector();
+
+      // MatchAllDocsQuery is for "browsing" (counts facets
+      // for all non-deleted docs in the index); normally
+      // you'd use a "normal" query, and use MultiCollector to
+      // wrap collecting the "normal" hits and also facets:
+      searcher.search(new MatchAllDocsQuery(), c);
+
+      return new FastTaxonomyFacetCounts(taxoReader, config, c);
+    } else {
+      return new FastTaxonomyFacetCounts(indexFieldName, searcher.getIndexReader(), taxoReader,
config);
+    }
+  }
 }


Mime
View raw message