From commits-return-109189-archive-asf-public=cust-asf.ponee.io@lucene.apache.org Tue Jul 2 06:51:52 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id A765D18060E for ; Tue, 2 Jul 2019 08:51:51 +0200 (CEST) Received: (qmail 95633 invoked by uid 500); 2 Jul 2019 06:51:47 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 95624 invoked by uid 99); 2 Jul 2019 06:51:47 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 02 Jul 2019 06:51:47 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 2E45387AD7; Tue, 2 Jul 2019 06:51:47 +0000 (UTC) Date: Tue, 02 Jul 2019 06:51:47 +0000 To: "commits@lucene.apache.org" Subject: [lucene-solr] branch master updated: LUCENE-8896: Override default implementation of IntersectVisitor#visit(DocIDSetBuilder, byte[]) for several queries (#756) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <156205030703.12610.11138843763901295901@gitbox.apache.org> From: ivera@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: lucene-solr X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 5e109fb0a76070c0ccb8e36f0c9db2f297a246c5 X-Git-Newrev: 7fc9b4976eec5912f9f10e849cf4f4c3c77b27a4 X-Git-Rev: 7fc9b4976eec5912f9f10e849cf4f4c3c77b27a4 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. ivera pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucene-solr.git The following commit(s) were added to refs/heads/master by this push: new 7fc9b49 LUCENE-8896: Override default implementation of IntersectVisitor#visit(DocIDSetBuilder, byte[]) for several queries (#756) 7fc9b49 is described below commit 7fc9b4976eec5912f9f10e849cf4f4c3c77b27a4 Author: Ignacio Vera AuthorDate: Tue Jul 2 08:51:41 2019 +0200 LUCENE-8896: Override default implementation of IntersectVisitor#visit(DocIDSetBuilder, byte[]) for several queries (#756) --- lucene/CHANGES.txt | 3 + .../lucene/document/LatLonPointDistanceQuery.java | 154 ++++++++++----------- .../lucene/document/LatLonPointInPolygonQuery.java | 14 +- .../apache/lucene/document/RangeFieldQuery.java | 16 ++- .../org/apache/lucene/search/PointInSetQuery.java | 35 ++++- .../org/apache/lucene/search/PointRangeQuery.java | 132 +++++++++--------- .../org/apache/lucene/search/TestPointQueries.java | 59 ++++++++ .../apache/lucene/document/LatLonShapeQuery.java | 39 +++++- .../lucene/document/BaseLatLonShapeTestCase.java | 19 +++ .../apache/lucene/geo/BaseGeoPointTestCase.java | 23 +++ .../lucene/search/BaseRangeFieldQueryTestCase.java | 29 ++++ 11 files changed, 369 insertions(+), 154 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ba631f0..5567f6a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -165,6 +165,9 @@ Optimizations * LUCENE-8885: Optimise BKD reader by exploiting cardinality information stored on leaves. (Ignacio Vera) +* LUCENE-8896: Override default implementation of IntersectVisitor#visit(DocIDSetBuilder, byte[]) + for several queries. (Ignacio Vera) + Test Framework * LUCENE-8825: CheckHits now display the shard index in case of mismatch diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 750124a..5e08b45 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -186,6 +186,56 @@ final class LatLonPointDistanceQuery extends Query { } + private boolean matches(byte[] packedValue) { + // bounding box check + if (Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 || + Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0) { + // latitude out of bounding box range + return false; + } + + if ((Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 || + Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) + && Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon2, 0, Integer.BYTES) < 0) { + // longitude out of bounding box range + return false; + } + + int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0); + int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES); + if (distancePredicate.test(docLatitude, docLongitude)) { + return true; + } + return false; + } + + // algorithm: we create a bounding box (two bounding boxes if we cross the dateline). + // 1. check our bounding box(es) first. if the subtree is entirely outside of those, bail. + // 2. check if the subtree is disjoint. it may cross the bounding box but not intersect with circle + // 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 4. + // 4. recurse naively (subtrees crossing over circle edge) + private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { + if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 || + Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0) { + // latitude out of bounding box range + return Relation.CELL_OUTSIDE_QUERY; + } + + if ((Arrays.compareUnsigned(minPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 || + Arrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) + && Arrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon2, 0, Integer.BYTES) < 0) { + // longitude out of bounding box range + return Relation.CELL_OUTSIDE_QUERY; + } + + double latMin = decodeLatitude(minPackedValue, 0); + double lonMin = decodeLongitude(minPackedValue, Integer.BYTES); + double latMax = decodeLatitude(maxPackedValue, 0); + double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES); + + return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat); + } + /** * Create a visitor that collects documents matching the range. */ @@ -206,53 +256,24 @@ final class LatLonPointDistanceQuery extends Query { @Override public void visit(int docID, byte[] packedValue) { - // bounding box check - if (Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0) { - // latitude out of bounding box range - return; - } - - if ((Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) - && Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon2, 0, Integer.BYTES) < 0) { - // longitude out of bounding box range - return; + if (matches(packedValue)) { + visit(docID); } + } - int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0); - int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES); - if (distancePredicate.test(docLatitude, docLongitude)) { - adder.add(docID); + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } } } - // algorithm: we create a bounding box (two bounding boxes if we cross the dateline). - // 1. check our bounding box(es) first. if the subtree is entirely outside of those, bail. - // 2. check if the subtree is disjoint. it may cross the bounding box but not intersect with circle - // 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 4. - // 4. recurse naively (subtrees crossing over circle edge) @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0) { - // latitude out of bounding box range - return Relation.CELL_OUTSIDE_QUERY; - } - - if ((Arrays.compareUnsigned(minPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) - && Arrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon2, 0, Integer.BYTES) < 0) { - // longitude out of bounding box range - return Relation.CELL_OUTSIDE_QUERY; - } - - double latMin = decodeLatitude(minPackedValue, 0); - double lonMin = decodeLongitude(minPackedValue, Integer.BYTES); - double latMax = decodeLatitude(maxPackedValue, 0); - double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES); - - return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat); + return relate(minPackedValue, maxPackedValue); } }; } @@ -271,53 +292,24 @@ final class LatLonPointDistanceQuery extends Query { @Override public void visit(int docID, byte[] packedValue) { - // bounding box check - if (Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0) { - // latitude out of bounding box range - result.clear(docID); - cost[0]--; - return; - } - - if ((Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) - && Arrays.compareUnsigned(packedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon2, 0, Integer.BYTES) < 0) { - // longitude out of bounding box range - result.clear(docID); - cost[0]--; - return; + if (matches(packedValue) == false) { + visit(docID); } + } - int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0); - int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES); - if (!distancePredicate.test(docLatitude, docLongitude)) { - result.clear(docID); - cost[0]--; + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue) == false) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } } } @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0) { - // latitude out of bounding box range - return Relation.CELL_INSIDE_QUERY; - } - - if ((Arrays.compareUnsigned(minPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 || - Arrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) - && Arrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon2, 0, Integer.BYTES) < 0) { - // latitude out of bounding box range - return Relation.CELL_INSIDE_QUERY; - } - - double latMin = decodeLatitude(minPackedValue, 0); - double lonMin = decodeLongitude(minPackedValue, Integer.BYTES); - double latMax = decodeLatitude(maxPackedValue, 0); - double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES); - - Relation relation = GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat); + Relation relation = relate(minPackedValue, maxPackedValue); switch (relation) { case CELL_INSIDE_QUERY: // all points match, skip this subtree @@ -329,10 +321,8 @@ final class LatLonPointDistanceQuery extends Query { return relation; } } - }; } - }; } diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index 6907b1d..160ea49 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; @@ -142,7 +143,18 @@ final class LatLonPointInPolygonQuery extends Query { public void visit(int docID, byte[] packedValue) { if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) { - adder.add(docID); + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), + NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index 37f30ed..6cba7de 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -275,20 +275,34 @@ abstract class RangeFieldQuery extends Query { private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { return new IntersectVisitor() { DocIdSetBuilder.BulkAdder adder; + @Override public void grow(int count) { adder = result.grow(count); } + @Override public void visit(int docID) throws IOException { adder.add(docID); } + @Override public void visit(int docID, byte[] leaf) throws IOException { if (queryType.matches(ranges, leaf, numDims, bytesPerDim)) { - adder.add(docID); + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] leaf) throws IOException { + if (queryType.matches(ranges, leaf, numDims, bytesPerDim)) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } } } + @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { return queryType.compare(ranges, minPackedValue, maxPackedValue, numDims, bytesPerDim); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index be1585b..58bef6c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -206,13 +206,27 @@ public abstract class PointInSetQuery extends Query implements Accountable { @Override public void visit(int docID, byte[] packedValue) { + if (matches(packedValue)) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } + } + } + + private boolean matches(byte[] packedValue) { scratch.bytes = packedValue; while (nextQueryPoint != null) { int cmp = nextQueryPoint.compareTo(scratch); if (cmp == 0) { - // Query point equals index point, so collect and return - adder.add(docID); - break; + return true; } else if (cmp < 0) { // Query point is before index point, so we move to next query point nextQueryPoint = iterator.next(); @@ -221,6 +235,7 @@ public abstract class PointInSetQuery extends Query implements Accountable { break; } } + return false; } @Override @@ -287,7 +302,19 @@ public abstract class PointInSetQuery extends Query implements Accountable { assert packedValue.length == pointBytes.length; if (Arrays.equals(packedValue, pointBytes)) { // The point for this doc matches the point we are querying on - adder.add(docID); + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + assert packedValue.length == pointBytes.length; + if (Arrays.equals(packedValue, pointBytes)) { + // The point for this set of docs matches the point we are querying on + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index c51b255..b8914c7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -113,6 +113,44 @@ public abstract class PointRangeQuery extends Query { return new ConstantScoreWeight(this, boost) { + private boolean matches(byte[] packedValue) { + for(int dim=0;dim 0) { + // Doc's value is too high, in this dimension + return false; + } + } + return true; + } + + private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { + + boolean crosses = false; + + for(int dim=0;dim 0 || + Arrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0) { + return Relation.CELL_OUTSIDE_QUERY; + } + + crosses |= Arrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0 || + Arrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, upperPoint, offset, offset + bytesPerDim) > 0; + } + + if (crosses) { + return Relation.CELL_CROSSES_QUERY; + } else { + return Relation.CELL_INSIDE_QUERY; + } + } + private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { return new IntersectVisitor() { @@ -130,44 +168,24 @@ public abstract class PointRangeQuery extends Query { @Override public void visit(int docID, byte[] packedValue) { - for(int dim=0;dim 0) { - // Doc's value is too high, in this dimension - return; - } + if (matches(packedValue)) { + visit(docID); } - - // Doc is in-bounds - adder.add(docID); } @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - - boolean crosses = false; - - for(int dim=0;dim 0 || - Arrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0) { - return Relation.CELL_OUTSIDE_QUERY; + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); } - - crosses |= Arrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0 || - Arrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, upperPoint, offset, offset + bytesPerDim) > 0; } + } - if (crosses) { - return Relation.CELL_CROSSES_QUERY; - } else { - return Relation.CELL_INSIDE_QUERY; - } + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return relate(minPackedValue, maxPackedValue); } }; } @@ -186,45 +204,33 @@ public abstract class PointRangeQuery extends Query { @Override public void visit(int docID, byte[] packedValue) { - for(int dim=0;dim 0) { - // Doc's value is too high, in this dimension - result.clear(docID); - cost[0]--; - return; - } + if (matches(packedValue) == false) { + visit(docID); } } @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - - boolean crosses = false; - - for(int dim=0;dim 0 || - Arrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0) { - // This dim is not in the range - return Relation.CELL_INSIDE_QUERY; + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue) == false) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); } - - crosses |= Arrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim, lowerPoint, offset, offset + bytesPerDim) < 0 || - Arrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim, upperPoint, offset, offset + bytesPerDim) > 0; } + } - if (crosses) { - return Relation.CELL_CROSSES_QUERY; - } else { - return Relation.CELL_OUTSIDE_QUERY; + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + Relation relation = relate(minPackedValue, maxPackedValue); + switch (relation) { + case CELL_INSIDE_QUERY: + // all points match, skip this subtree + return Relation.CELL_OUTSIDE_QUERY; + case CELL_OUTSIDE_QUERY: + // none of the points match, clear all documents + return Relation.CELL_INSIDE_QUERY; + default: + return relation; } } }; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index 2bc6b2c..49c2907 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -1761,6 +1761,65 @@ public class TestPointQueries extends LuceneTestCase { dir.close(); } + public void testPointRangeQueryManyEqualValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + IndexWriter w = new IndexWriter(dir, iwc); + + int cardinality = TestUtil.nextInt(random(), 2, 20); + + int zeroCount = 0; + int oneCount = 0; + for(int i=0;i<10000;i++) { + int x = random().nextInt(cardinality); + if (x == 0) { + zeroCount++; + } else if (x == 1) { + oneCount++; + } + Document doc = new Document(); + doc.add(new IntPoint("int", x)); + doc.add(new LongPoint("long", (long) x)); + doc.add(new FloatPoint("float", (float) x)); + doc.add(new DoublePoint("double", (double) x)); + doc.add(new BinaryPoint("bytes", new byte[] {(byte) x})); + w.addDocument(doc); + } + + IndexReader r = DirectoryReader.open(w); + IndexSearcher s = newSearcher(r, false); + + assertEquals(zeroCount, s.count(IntPoint.newRangeQuery("int", 0, 0))); + assertEquals(oneCount, s.count(IntPoint.newRangeQuery("int", 1, 1))); + assertEquals(zeroCount + oneCount, s.count(IntPoint.newRangeQuery("int", 0, 1))); + assertEquals(10000 - zeroCount - oneCount, s.count(IntPoint.newRangeQuery("int", 2, cardinality))); + + assertEquals(zeroCount, s.count(LongPoint.newRangeQuery("long", 0, 0))); + assertEquals(oneCount, s.count(LongPoint.newRangeQuery("long", 1, 1))); + assertEquals(zeroCount + oneCount, s.count(LongPoint.newRangeQuery("long", 0, 1))); + assertEquals(10000 - zeroCount - oneCount, s.count(LongPoint.newRangeQuery("long", 2, cardinality))); + + assertEquals(zeroCount, s.count(FloatPoint.newRangeQuery("float", 0, 0))); + assertEquals(oneCount, s.count(FloatPoint.newRangeQuery("float", 1, 1))); + assertEquals(zeroCount + oneCount, s.count(FloatPoint.newRangeQuery("float", 0, 1))); + assertEquals(10000 - zeroCount - oneCount, s.count(FloatPoint.newRangeQuery("float", 2, cardinality))); + + assertEquals(zeroCount, s.count(DoublePoint.newRangeQuery("double", 0, 0))); + assertEquals(oneCount, s.count(DoublePoint.newRangeQuery("double", 1, 1))); + assertEquals(zeroCount + oneCount, s.count(DoublePoint.newRangeQuery("double", 0, 1))); + assertEquals(10000 - zeroCount - oneCount, s.count(DoublePoint.newRangeQuery("double", 2, cardinality))); + + assertEquals(zeroCount, s.count(BinaryPoint.newRangeQuery("bytes", new byte[] {0}, new byte[] {0}))); + assertEquals(oneCount, s.count(BinaryPoint.newRangeQuery("bytes", new byte[] {1}, new byte[] {1}))); + assertEquals(zeroCount + oneCount, s.count(BinaryPoint.newRangeQuery("bytes", new byte[] {0}, new byte[] {1}))); + assertEquals(10000 - zeroCount - oneCount, s.count(BinaryPoint.newRangeQuery("bytes", new byte[] {2}, new byte[] {(byte) cardinality}))); + + w.close(); + r.close(); + dir.close(); + } + public void testPointInSetQueryManyEqualValuesWithBigGap() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonShapeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonShapeQuery.java index 124a7a7..691edcf 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonShapeQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonShapeQuery.java @@ -117,7 +117,17 @@ abstract class LatLonShapeQuery extends Query { @Override public void visit(int docID, byte[] t) throws IOException { if (queryMatches(t, scratchTriangle, QueryRelation.INTERSECTS)) { - adder.add(docID); + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] t) throws IOException { + if (queryMatches(t, scratchTriangle, QueryRelation.INTERSECTS)) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } } } @@ -153,6 +163,19 @@ abstract class LatLonShapeQuery extends Query { } @Override + public void visit(DocIdSetIterator iterator, byte[] t) throws IOException { + boolean queryMatches = queryMatches(t, scratchTriangle, queryRelation); + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (queryMatches) { + intersect.set(docID); + } else { + disjoint.set(docID); + } + } + } + + @Override public Relation compare(byte[] minTriangle, byte[] maxTriangle) { return relateRangeToQuery(minTriangle, maxTriangle, queryRelation); } @@ -308,12 +331,22 @@ abstract class LatLonShapeQuery extends Query { @Override public void visit(int docID, byte[] packedTriangle) { if (query.queryMatches(packedTriangle, scratchTriangle, QueryRelation.INTERSECTS) == false) { - result.clear(docID); - cost[0]--; + visit(docID); } } @Override + public void visit(DocIdSetIterator iterator, byte[] t) throws IOException { + if (query.queryMatches(t, scratchTriangle, QueryRelation.INTERSECTS) == false) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } + } + } + + + @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { return transposeRelation(query.relateRangeToQuery(minPackedValue, maxPackedValue, QueryRelation.INTERSECTS)); } diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/BaseLatLonShapeTestCase.java b/lucene/sandbox/src/test/org/apache/lucene/document/BaseLatLonShapeTestCase.java index ec60d61..fc7e290 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/BaseLatLonShapeTestCase.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/BaseLatLonShapeTestCase.java @@ -49,6 +49,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; @@ -253,6 +254,24 @@ public abstract class BaseLatLonShapeTestCase extends LuceneTestCase { verify(shapes); } + // Force low cardinality leaves + public void testLowCardinalityShapeManyTimes() throws Exception { + int numShapes = atLeast(500); + int cardinality = TestUtil.nextInt(random(), 2, 20); + + Object[] diffShapes = new Object[cardinality]; + for (int i = 0; i < cardinality; i++) { + diffShapes[i] = nextShape(); + } + + Object[] shapes = new Object[numShapes]; + for (int i = 0; i < numShapes; i++) { + shapes[i] = diffShapes[random().nextInt(cardinality)]; + } + + verify(shapes); + } + public void testRandomTiny() throws Exception { // Make sure single-leaf-node case is OK: doTestRandom(10); diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java index 066716a..134e0be 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java @@ -414,6 +414,29 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase { verify(lats, lons); } + // A particularly tricky adversary for BKD tree: + public void testLowCardinality() throws Exception { + int numPoints = atLeast(1000); + int cardinality = TestUtil.nextInt(random(), 2, 20); + + double[] diffLons = new double[cardinality]; + double[] diffLats = new double[cardinality]; + for (int i = 0; i< cardinality; i++) { + diffLats[i] = nextLatitude(); + diffLons[i] = nextLongitude(); + } + + double[] lats = new double[numPoints]; + double[] lons = new double[numPoints]; + for (int i = 0; i < numPoints; i++) { + int index = random().nextInt(cardinality); + lats[i] = diffLats[index]; + lons[i] = diffLons[index]; + } + + verify(lats, lons); + } + public void testAllLatEqual() throws Exception { int numPoints = atLeast(10000); double lat = nextLatitude(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java index 09a8e89..5015f36 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -38,6 +39,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; /** * Abstract class to do basic tests for a RangeField query. Testing rigor inspired by {@code BaseGeoPointTestCase} @@ -79,6 +81,33 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase { doTestRandom(10000, true); } + public void testAllEqual() throws Exception { + int numDocs = atLeast(10000); + int dimensions = dimension(); + Range[][] ranges = new Range[numDocs][]; + Range[] theRange = new Range[] {nextRange(dimensions)}; + Arrays.fill(ranges, theRange); + verify(ranges); + } + + // Force low cardinality leaves + public void testLowCardinality() throws Exception { + int numDocs = atLeast(10000); + int dimensions = dimension(); + + int cardinality = TestUtil.nextInt(random(), 2, 20); + Range[][] diffRanges = new Range[cardinality][]; + for (int i = 0; i < cardinality; i++) { + diffRanges[i] = new Range[] {nextRange(dimensions)}; + } + + Range[][] ranges = new Range[numDocs][]; + for (int i = 0; i < numDocs; i++) { + ranges[i] = diffRanges[random().nextInt(cardinality)]; + } + verify(ranges); + } + private void doTestRandom(int count, boolean multiValued) throws Exception { int numDocs = atLeast(count); int dimensions = dimension();