lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1546675 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/request/DocValuesFacets.java core/src/java/org/apache/solr/request/SimpleFacets.java core/src/java/org/apache/solr/search/BitDocSet.java
Date Sat, 30 Nov 2013 01:32:27 GMT
Author: rmuir
Date: Sat Nov 30 01:32:26 2013
New Revision: 1546675

URL: http://svn.apache.org/r1546675
Log:
SOLR-5512: Optimize DocValuesFacets

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1546675&r1=1546674&r2=1546675&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sat Nov 30 01:32:26 2013
@@ -174,6 +174,8 @@ Optimizations
 * SOLR-5189: Solr 4.x Web UI Log Viewer does not display 'date' column from 
   logs (steffkes)
 
+* SOLR-5512: Optimize DocValuesFacets. (Robert Muir)
+
 Other Changes
 ---------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java?rev=1546675&r1=1546674&r2=1546675&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java Sat Nov
30 01:32:26 2013
@@ -232,9 +232,20 @@ public class DocValuesFacets {
     return res;
   }
   
-  /** accumulates per-segment single-valued facet counts, mapping to global ordinal space
*/
-  // specialized since the single-valued case is different
+  /** accumulates per-segment single-valued facet counts */
   static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator
disi, int subIndex, OrdinalMap map) throws IOException {
+    if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10))
{
+      // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):

+      //   collect separately per-segment, then map to global ords
+      accumSingleSeg(counts, si, disi, subIndex, map);
+    } else {
+      // otherwise: do collect+map on the fly
+      accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map);
+    }
+  }
+  
+  /** accumulates per-segment single-valued facet counts, mapping to global ordinal space
on-the-fly */
+  static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator
disi, int subIndex, OrdinalMap map) throws IOException {
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       int term = si.getOrd(doc);
@@ -246,8 +257,41 @@ public class DocValuesFacets {
     }
   }
   
-  /** accumulates per-segment multi-valued facet counts, mapping to global ordinal space
*/
+  /** "typical" single-valued faceting: not too many unique values, no prefixing. maps to
global ordinals as a separate step */
+  static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int
subIndex, OrdinalMap map) throws IOException {
+    // First count in seg-ord space:
+    final int segCounts[];
+    if (map == null) {
+      segCounts = counts;
+    } else {
+      segCounts = new int[1+si.getValueCount()];
+    }
+    
+    int doc;
+    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+      segCounts[1+si.getOrd(doc)]++;
+    }
+    
+    // migrate to global ords (if necessary)
+    if (map != null) {
+      migrateGlobal(counts, segCounts, subIndex, map);
+    }
+  }
+  
+  /** accumulates per-segment multi-valued facet counts */
   static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator
disi, int subIndex, OrdinalMap map) throws IOException {
+    if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10))
{
+      // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):

+      //   collect separately per-segment, then map to global ords
+      accumMultiSeg(counts, si, disi, subIndex, map);
+    } else {
+      // otherwise: do collect+map on the fly
+      accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map);
+    }
+  }
+    
+  /** accumulates per-segment multi-valued facet counts, mapping to global ordinal space
on-the-fly */
+  static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si,
DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       si.setDocument(doc);
@@ -269,4 +313,47 @@ public class DocValuesFacets {
       } while ((term = (int) si.nextOrd()) >= 0);
     }
   }
+  
+  /** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to
global ordinals as a separate step */
+  static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int
subIndex, OrdinalMap map) throws IOException {
+    // First count in seg-ord space:
+    final int segCounts[];
+    if (map == null) {
+      segCounts = counts;
+    } else {
+      segCounts = new int[1+(int)si.getValueCount()];
+    }
+    
+    int doc;
+    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+      si.setDocument(doc);
+      int term = (int) si.nextOrd();
+      if (term < 0) {
+        counts[0]++; // missing
+      } else {
+        do {
+          segCounts[1+term]++;
+        } while ((term = (int)si.nextOrd()) >= 0);
+      }
+    }
+    
+    // migrate to global ords (if necessary)
+    if (map != null) {
+      migrateGlobal(counts, segCounts, subIndex, map);
+    }
+  }
+  
+  /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts)
*/
+  static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map)
{
+    // missing count
+    counts[0] += segCounts[0];
+    
+    // migrate actual ordinals
+    for (int ord = 1; ord < segCounts.length; ord++) {
+      int count = segCounts[ord];
+      if (count != 0) {
+        counts[1+(int) map.getGlobalOrd(subIndex, ord-1)] += count;
+      }
+    }
+  }
 }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java?rev=1546675&r1=1546674&r2=1546675&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java Sat Nov
30 01:32:26 2013
@@ -566,7 +566,7 @@ public class SimpleFacets {
               throw se;
             } catch (Exception e) {
               throw new SolrException(ErrorCode.SERVER_ERROR,
-                                      "Exception during facet.field: " + workerFacetValue,
e.getCause());
+                                      "Exception during facet.field: " + workerFacetValue,
e);
             } finally {
               semaphore.release();
             }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java?rev=1546675&r1=1546674&r2=1546675&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java Sat Nov 30 01:32:26
2013
@@ -296,11 +296,11 @@ public class BitDocSet extends DocSetBas
               @Override
               public long cost() {
                 // we don't want to actually compute cardinality, but
-                // if its already been computed, we use it
+                // if its already been computed, we use it (pro-rated for the segment)
                 if (size != -1) {
-                  return size;
+                  return (long)(size * ((OpenBitSet.bits2words(maxDoc)<<6) / (float)bs.capacity()));
                 } else {
-                  return bs.capacity();
+                  return maxDoc;
                 }
               }
             };



Mime
View raw message