lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject svn commit: r1629598 - in /lucene/dev/trunk/lucene: CHANGES.txt core/src/java/org/apache/lucene/search/FilteredQuery.java core/src/test/org/apache/lucene/search/TestFilteredQuery.java test-framework/src/java/org/apache/lucene/util/TestUtil.java
Date Mon, 06 Oct 2014 09:19:59 GMT
Author: jpountz
Date: Mon Oct  6 09:19:59 2014
New Revision: 1629598

URL: http://svn.apache.org/r1629598
Log:
LUCENE-5979: Use the cost API to decide on whether to use random-access to intersect queries
and filters.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1629598&r1=1629597&r2=1629598&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Oct  6 09:19:59 2014
@@ -1250,6 +1250,10 @@ New Features
   approximate value of the diameter of the earth at the given latitude.
   (Adrien Grand)
 
+* LUCENE-5979: FilteredQuery uses the cost API to decide on whether to use
+  random-access or leap-frog to intersect the filter with the query.
+  (Adrien Grand)
+
 Build
 
 * LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java?rev=1629598&r1=1629597&r2=1629598&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java Mon
Oct  6 09:19:59 2014
@@ -254,12 +254,12 @@ public class FilteredQuery extends Query
    * jumping past the target document. When both land on the same document, it's
    * collected.
    */
-  private static class LeapFrogScorer extends Scorer {
+  private static final class LeapFrogScorer extends Scorer {
     private final DocIdSetIterator secondary;
     private final DocIdSetIterator primary;
     private final Scorer scorer;
-    protected int primaryDoc = -1;
-    protected int secondaryDoc = -1;
+    private int primaryDoc = -1;
+    private int secondaryDoc = -1;
 
     protected LeapFrogScorer(Weight weight, DocIdSetIterator primary, DocIdSetIterator secondary,
Scorer scorer) {
       super(weight);
@@ -324,26 +324,6 @@ public class FilteredQuery extends Query
     }
   }
   
-  // TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
-  private static final class PrimaryAdvancedLeapFrogScorer extends LeapFrogScorer {
-    private final int firstFilteredDoc;
-
-    protected PrimaryAdvancedLeapFrogScorer(Weight weight, int firstFilteredDoc, DocIdSetIterator
filterIter, Scorer other) {
-      super(weight, filterIter, other, other);
-      this.firstFilteredDoc = firstFilteredDoc;
-      this.primaryDoc = firstFilteredDoc; // initialize to prevent and advance call to move
it further
-    }
-
-    @Override
-    protected int primaryNext() throws IOException {
-      if (secondaryDoc != -1) {
-        return super.primaryNext();
-      } else {
-        return firstFilteredDoc;
-      }
-    }
-  }
-  
   /** Rewrites the query. If the wrapped is an instance of
    * {@link MatchAllDocsQuery} it returns a {@link ConstantScoreQuery}. Otherwise
    * it returns a new {@code FilteredQuery} wrapping the rewritten query. */
@@ -421,7 +401,7 @@ public class FilteredQuery extends Query
    * A {@link FilterStrategy} that conditionally uses a random access filter if
    * the given {@link DocIdSet} supports random access (returns a non-null value
    * from {@link DocIdSet#bits()}) and
-   * {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns
+   * {@link RandomAccessFilterStrategy#useRandomAccess(Bits, long)} returns
    * <code>true</code>. Otherwise this strategy falls back to a "zig-zag join"
(
    * {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy.
    * 
@@ -515,7 +495,7 @@ public class FilteredQuery extends Query
    * A {@link FilterStrategy} that conditionally uses a random access filter if
    * the given {@link DocIdSet} supports random access (returns a non-null value
    * from {@link DocIdSet#bits()}) and
-   * {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns
+   * {@link RandomAccessFilterStrategy#useRandomAccess(Bits, long)} returns
    * <code>true</code>. Otherwise this strategy falls back to a "zig-zag join"
(
    * {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy .
    */
@@ -528,25 +508,18 @@ public class FilteredQuery extends Query
         // this means the filter does not accept any documents.
         return null;
       }  
-
-      final int firstFilterDoc = filterIter.nextDoc();
-      if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) {
-        return null;
-      }
       
       final Bits filterAcceptDocs = docIdSet.bits();
       // force if RA is requested
-      final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs,
firstFilterDoc);
+      final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs,
filterIter.cost());
       if (useRandomAccess) {
         // if we are using random access, we return the inner scorer, just with other acceptDocs
         return weight.scorer(context, filterAcceptDocs);
       } else {
-        assert firstFilterDoc > -1;
         // we are gonna advance() this scorer, so we set inorder=true/toplevel=false
         // we pass null as acceptDocs, as our filter has already respected acceptDocs, no
need to do twice
         final Scorer scorer = weight.scorer(context, null);
-        // TODO once we have way to figure out if we use RA or LeapFrog we can remove this
scorer
-        return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc,
filterIter, scorer);
+        return (scorer == null) ? null : new LeapFrogScorer(weight, filterIter, scorer, scorer);
       }
     }
     
@@ -557,14 +530,14 @@ public class FilteredQuery extends Query
      * However, when the filter is very sparse, it can be faster to execute the query+filter
      * as a conjunction in some cases.
      * 
-     * The default implementation returns <code>true</code> if the first document
accepted by the
-     * filter is < 100.
+     * The default implementation returns <code>true</code> if the filter matches
more than 1%
+     * of documents
      * 
      * @lucene.internal
      */
-    protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
-      //TODO once we have a cost API on filters and scorers we should rethink this heuristic
-      return firstFilterDoc < 100;
+    protected boolean useRandomAccess(Bits bits, long filterCost) {
+      // if the filter matches more than 1% of documents, we use random-access
+      return filterCost * 100 > bits.length();
     }
   }
   

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java?rev=1629598&r1=1629597&r2=1629598&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
(original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
Mon Oct  6 09:19:59 2014
@@ -387,7 +387,7 @@ public class TestFilteredQuery extends L
     if (useRandomAccess) {
       return new FilteredQuery.RandomAccessFilterStrategy() {
         @Override
-        protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
+        protected boolean useRandomAccess(Bits bits, long filterCost) {
           return true;
         }
       };

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java?rev=1629598&r1=1629597&r2=1629598&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java Mon
Oct  6 09:19:59 2014
@@ -1025,7 +1025,7 @@ public final class TestUtil {
       case 4:
         return new FilteredQuery.RandomAccessFilterStrategy() {
           @Override
-          protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
+          protected boolean useRandomAccess(Bits bits, long filterCost) {
             return LuceneTestCase.random().nextBoolean();
           }
         };



Mime
View raw message