lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nkn...@apache.org
Subject lucene-solr git commit: LUCENE-7019: add two-phase iteration to GeoPointTermQueryConstantScoreWrapper
Date Tue, 09 Feb 2016 20:38:45 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_5x 594145f2d -> b92ccc01f


LUCENE-7019: add two-phase iteration to GeoPointTermQueryConstantScoreWrapper


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b92ccc01
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b92ccc01
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b92ccc01

Branch: refs/heads/branch_5x
Commit: b92ccc01f6daa43a2afb464c9112d53cbba9cc00
Parents: 594145f
Author: nknize <nknize@apache.org>
Authored: Tue Feb 9 14:13:56 2016 -0600
Committer: nknize <nknize@apache.org>
Committed: Tue Feb 9 14:38:16 2016 -0600

----------------------------------------------------------------------
 lucene/CHANGES.txt                              | 10 +++
 .../GeoPointTermQueryConstantScoreWrapper.java  | 88 +++++++++++---------
 2 files changed, 60 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b92ccc01/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f1c9968..be90153 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -174,6 +174,16 @@ Other
   TestSortingMergePolicy now extend it, TestUpgradeIndexMergePolicy added)
   (Christine Poerschke)
 
+======================= Lucene 5.4.2 =======================
+
+Bug Fixes
+
+* LUCENE-7018: Fix GeoPointTermQueryConstantScoreWrapper to add document on
+  first GeoPointField match. (Nick Knize)
+
+* LUCENE-7019: add two-phase iteration to GeoPointTermQueryConstantScoreWrapper.
+  (Robert Muir via Nick Knize)
+
 ======================= Lucene 5.4.1 =======================
 
 Bug Fixes

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b92ccc01/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
b/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
index 1097add..ae0ef14 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
@@ -23,7 +23,6 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSet;
@@ -31,8 +30,12 @@ import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
 
 import static org.apache.lucene.spatial.util.GeoEncodingUtils.mortonUnhashLat;
 import static org.apache.lucene.spatial.util.GeoEncodingUtils.mortonUnhashLon;
@@ -74,67 +77,76 @@ final class GeoPointTermQueryConstantScoreWrapper <Q extends GeoPointMultiTermQu
   public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException
{
     return new ConstantScoreWeight(this) {
 
-      private DocIdSet getDocIDs(LeafReaderContext context) throws IOException {
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
         final Terms terms = context.reader().terms(query.getField());
         if (terms == null) {
-          return DocIdSet.EMPTY;
+          return null;
         }
 
         final GeoPointTermsEnum termsEnum = (GeoPointTermsEnum)(query.getTermsEnum(terms,
null));
         assert termsEnum != null;
 
         LeafReader reader = context.reader();
+        // approximation (postfiltering has not yet been applied)
         DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
+        // subset of documents that need no postfiltering, this is purely an optimization
+        final BitSet preApproved;
+        // dumb heuristic: if the field is really sparse, use a sparse impl
+        if (terms.getDocCount() * 100L < reader.maxDoc()) {
+          preApproved = new SparseFixedBitSet(reader.maxDoc());
+        } else {
+          preApproved = new FixedBitSet(reader.maxDoc());
+        }
         PostingsEnum docs = null;
-        SortedNumericDocValues sdv = reader.getSortedNumericDocValues(query.getField());
 
         while (termsEnum.next() != null) {
           docs = termsEnum.postings(docs, PostingsEnum.NONE);
-          // boundary terms need post filtering by
+          // boundary terms need post filtering
           if (termsEnum.boundaryTerm()) {
-            int docId = docs.nextDoc();
-            long hash;
-            do {
-              sdv.setDocument(docId);
-              for (int i=0; i<sdv.count(); ++i) {
-                hash = sdv.valueAt(i);
-                if (termsEnum.postFilter(mortonUnhashLon(hash), mortonUnhashLat(hash))) {
-                  builder.add(docId);
-                  break;
-                }
-              }
-            } while ((docId = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS);
-          } else {
             builder.add(docs);
+          } else {
+            int docId;
+            while ((docId = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+              builder.add(docId);
+              preApproved.set(docId);
+            }
           }
         }
 
-        return builder.build();
-      }
-
-      private Scorer scorer(DocIdSet set) throws IOException {
-        if (set == null) {
-          return null;
-        }
+        DocIdSet set = builder.build();
         final DocIdSetIterator disi = set.iterator();
         if (disi == null) {
           return null;
         }
-        return new ConstantScoreScorer(this, score(), disi);
-      }
 
-      @Override
-      public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
-        final Scorer scorer = scorer(getDocIDs(context));
-        if (scorer == null) {
-          return null;
-        }
-        return new DefaultBulkScorer(scorer);
-      }
+        // return two-phase iterator using docvalues to postfilter candidates
+        final SortedNumericDocValues sdv = reader.getSortedNumericDocValues(query.getField());
+        TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
+          @Override
+          public boolean matches() throws IOException {
+            int docId = disi.docID();
+            if (preApproved.get(docId)) {
+              return true;
+            } else {
+              sdv.setDocument(docId);
+              int count = sdv.count();
+              for (int i = 0; i < count; i++) {
+                long hash = sdv.valueAt(i);
+                if (termsEnum.postFilter(mortonUnhashLon(hash), mortonUnhashLat(hash))) {
+                  return true;
+                }
+              }
+              return false;
+            }
+          }
 
-      @Override
-      public Scorer scorer(LeafReaderContext context) throws IOException {
-        return scorer(getDocIDs(context));
+          @Override
+          public float matchCost() {
+            return 20; // TODO: make this fancier
+          }
+        };
+        return new ConstantScoreScorer(this, score(), iterator);
       }
     };
   }


Mime
View raw message