incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject git commit: Massive performance improvement for faceted queries. Before optimize, the query would run the slower as the query got less hits. Now the scorer will advance over the facet as fast as possible.
Date Wed, 19 Mar 2014 01:19:02 GMT
Repository: incubator-blur
Updated Branches:
  refs/heads/apache-blur-0.2 86632167c -> 444cd4adf


Massive performance improvement for faceted queries.  Before optimize, the query would run
the slower as the query got less hits.  Now the scorer will advance over the facet as fast
as possible.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/444cd4ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/444cd4ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/444cd4ad

Branch: refs/heads/apache-blur-0.2
Commit: 444cd4adffdcf67829bae9af27b4dbbadf8e20a3
Parents: 8663216
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Tue Mar 18 21:18:58 2014 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Tue Mar 18 21:18:58 2014 -0400

----------------------------------------------------------------------
 .../blur/lucene/search/FacetExecutor.java       | 16 +++++-
 .../blur/lucene/search/FacetQueryTest.java      | 57 ++++++++++++++++++--
 2 files changed, 67 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/444cd4ad/blur-query/src/main/java/org/apache/blur/lucene/search/FacetExecutor.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/lucene/search/FacetExecutor.java b/blur-query/src/main/java/org/apache/blur/lucene/search/FacetExecutor.java
index 7359857..ed2abc7 100644
--- a/blur-query/src/main/java/org/apache/blur/lucene/search/FacetExecutor.java
+++ b/blur-query/src/main/java/org/apache/blur/lucene/search/FacetExecutor.java
@@ -39,6 +39,7 @@ import org.apache.blur.trace.Tracer;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.util.OpenBitSet;
 
@@ -59,6 +60,7 @@ public class FacetExecutor {
 
     int _hits;
     final OpenBitSet _bitSet;
+    Scorer _scorer;
 
     SimpleCollector(OpenBitSet bitSet) {
       _bitSet = bitSet;
@@ -68,12 +70,23 @@ public class FacetExecutor {
     public void collect(int doc) throws IOException {
       if (_bitSet.fastGet(doc)) {
         _hits++;
+      } else {
+        int nextSetBit = _bitSet.nextSetBit(doc);
+        if (nextSetBit < 0) {
+          // Move to the end of the scorer.
+          _scorer.advance(DocIdSetIterator.NO_MORE_DOCS);
+        } else {
+          int advance = _scorer.advance(nextSetBit);
+          if (_bitSet.fastGet(advance)) {
+            _hits++;
+          }
+        }
       }
     }
 
     @Override
     public void setScorer(Scorer scorer) throws IOException {
-
+      _scorer = scorer;
     }
 
     @Override
@@ -154,7 +167,6 @@ public class FacetExecutor {
       for (int i = 0; i < _scorers.length; i++) {
         ids.put(i);
       }
-
     }
 
     private void runFacet(AtomicLongArray counts, SimpleCollector col, int i) throws IOException
{

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/444cd4ad/blur-query/src/test/java/org/apache/blur/lucene/search/FacetQueryTest.java
----------------------------------------------------------------------
diff --git a/blur-query/src/test/java/org/apache/blur/lucene/search/FacetQueryTest.java b/blur-query/src/test/java/org/apache/blur/lucene/search/FacetQueryTest.java
index e098e62..c8aa393 100644
--- a/blur-query/src/test/java/org/apache/blur/lucene/search/FacetQueryTest.java
+++ b/blur-query/src/test/java/org/apache/blur/lucene/search/FacetQueryTest.java
@@ -53,8 +53,11 @@ import org.junit.Test;
 
 public class FacetQueryTest {
 
+  private static final boolean TRACE = false;
+
   @Test
   public void testFacetQueryNoSuper() throws IOException, InterruptedException {
+    System.out.println("testFacetQueryNoSuper");
     IndexReader reader = createIndex(10, 0, true);
     BooleanQuery bq = new BooleanQuery();
     bq.add(new TermQuery(new Term("f1", "value")), Occur.SHOULD);
@@ -85,7 +88,8 @@ public class FacetQueryTest {
   }
 
   @Test
-  public void testFacetQueryPerformance() throws IOException, InterruptedException {
+  public void testFacetQueryPerformance1() throws IOException, InterruptedException {
+    System.out.println("testFacetQueryPerformance1");
     BlurConfiguration configuration = new BlurConfiguration();
     Trace.setStorage(new LogTraceStorage(configuration));
     int facetCount = 200;
@@ -106,11 +110,11 @@ public class FacetQueryTest {
         FacetQuery facetQuery = new FacetQuery(new TermQuery(new Term("f1", "value")), facets,
facetExecutor);
         long t1 = System.nanoTime();
         indexSearcher.search(facetQuery, 10);
-        if (t == 4) {
+        if (t == 4 && TRACE) {
           Trace.setupTrace("unittest");
         }
         facetExecutor.processFacets(executor);
-        if (t == 4) {
+        if (t == 4 && TRACE) {
           Trace.tearDownTrace();
         }
         executor.shutdown();
@@ -128,7 +132,52 @@ public class FacetQueryTest {
   }
 
   @Test
+  public void testFacetQueryPerformance2() throws IOException, InterruptedException {
+    System.out.println("testFacetQueryPerformance2");
+    BlurConfiguration configuration = new BlurConfiguration();
+    Trace.setStorage(new LogTraceStorage(configuration));
+    int facetCount = 200;
+    int docCount = 1000000;
+    IndexReader reader = createIndex(docCount, facetCount, false);
+
+    Query[] facets = new Query[facetCount];
+    for (int i = 0; i < facetCount; i++) {
+      facets[i] = new TermQuery(new Term("facet" + i, "value"));
+    }
+
+    ExecutorService executor = null;
+    try {
+      for (int t = 0; t < 5; t++) {
+        executor = getThreadPool(20);
+        IndexSearcher indexSearcher = new IndexSearcher(reader, executor);
+        FacetExecutor facetExecutor = new FacetExecutor(facets.length);
+        FacetQuery facetQuery = new FacetQuery(new TermQuery(new Term("f2", "v45")), facets,
facetExecutor);
+        long t1 = System.nanoTime();
+        indexSearcher.search(facetQuery, 10);
+        if (t == 4 && TRACE) {
+          Trace.setupTrace("unittest");
+        }
+        facetExecutor.processFacets(executor);
+        if (t == 4 && TRACE) {
+          Trace.tearDownTrace();
+        }
+        executor.shutdown();
+        executor.awaitTermination(10, TimeUnit.SECONDS);
+        long t2 = System.nanoTime();
+        System.out.println((t2 - t1) / 1000000.0);
+
+        for (int i = 0; i < facetExecutor.length(); i++) {
+          assertEquals(1, facetExecutor.get(i));
+        }
+      }
+    } finally {
+      executor.shutdownNow();
+    }
+  }
+
+  @Test
   public void testFacetQueryPerformanceWithMins() throws IOException, InterruptedException
{
+    System.out.println("testFacetQueryPerformanceWithMins");
     int facetCount = 200;
     int docCount = 1000000;
     IndexReader reader = createIndex(docCount, facetCount, false);
@@ -160,7 +209,7 @@ public class FacetQueryTest {
         System.out.println((t2 - t1) / 1000000.0);
 
         for (int i = 0; i < facetExecutor.length(); i++) {
-          
+
           assertTrue(facetExecutor.get(i) >= min);
         }
       }


Mime
View raw message