lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject [lucene-solr] branch master updated: SOLR-13367: Range queries will now highlight in hl.method=unified mode. Lucene MatchesUtils.disjunction method for disjunction over BytesRefIterator terms.
Date Tue, 25 Jun 2019 04:10:22 GMT
This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 85ec39d  SOLR-13367: Range queries will now highlight in hl.method=unified mode.
Lucene MatchesUtils.disjunction method for disjunction over  BytesRefIterator terms.
85ec39d is described below

commit 85ec39d931c742b68cc1873da2fe17800eefda23
Author: David Smiley <dsmiley@apache.org>
AuthorDate: Tue Jun 25 00:10:08 2019 -0400

    SOLR-13367: Range queries will now highlight in hl.method=unified mode.
    Lucene MatchesUtils.disjunction method for disjunction over
     BytesRefIterator terms.
---
 .../src/java/org/apache/lucene/search/MatchesUtils.java   | 11 +++++++++++
 solr/CHANGES.txt                                          |  2 ++
 .../src/java/org/apache/solr/query/SolrRangeQuery.java    | 15 +++++++++++++++
 .../apache/solr/highlight/TestUnifiedSolrHighlighter.java |  5 +++++
 solr/solr-ref-guide/src/highlighting.adoc                 | 11 +++++++----
 5 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java b/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java
index a8438ae..bf460b7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java
@@ -26,6 +26,8 @@ import java.util.Objects;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.IOSupplier;
 
 /**
@@ -129,4 +131,13 @@ public final class MatchesUtils {
   public static MatchesIterator disjunction(List<MatchesIterator> subMatches) throws
IOException {
     return DisjunctionMatchesIterator.fromSubIterators(subMatches);
   }
+
+  /**
+   * Create a MatchesIterator that is a disjunction over a list of terms extracted from a
{@link BytesRefIterator}.
+   *
+   * Only terms that have at least one match in the given document will be included
+   */
+  public static MatchesIterator disjunction(LeafReaderContext context, int doc, Query query,
String field, BytesRefIterator terms) throws IOException {
+    return DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, field, terms);
+  }
 }
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 992c218..14bdc71 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -144,6 +144,8 @@ New Features
 
 * SOLR-10291: Add matches Stream Evaluator to support regex matching (Joel Bernstein)
 
+* SOLR-13367: Highlighting: Range queries will now highlight in hl.method=unified mode. (David
Smiley)
+
 Bug Fixes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
index 40a68e5..49e44d9 100644
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@@ -39,6 +39,8 @@ import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
+import org.apache.lucene.search.MatchesUtils;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.ScoreMode;
@@ -347,6 +349,19 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements
DocSetPro
       this.scoreMode = scoreMode;
     }
 
+    // See MultiTermQueryConstantScoreWrapper matches()
+    @Override
+    public Matches matches(LeafReaderContext context, int doc) throws IOException {
+      SolrRangeQuery query = SolrRangeQuery.this;
+      final Terms terms = context.reader().terms(query.field);
+      if (terms == null) {
+        return null;
+      }
+      if (terms.hasPositions() == false) {
+        return super.matches(context, doc);
+      }
+      return MatchesUtils.forField(query.field, () -> MatchesUtils.disjunction(context,
doc, query, query.field, query.getTermsEnum(context)));
+    }
 
     /** Try to collect terms from the given terms enum and return count=sum(df) for terms
visited so far
      *  or (-count - 1) if this should be rewritten into a boolean query.
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
index f03376d..a009e1f 100644
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@@ -282,6 +282,11 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
         "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em>&#32;one&#32;has&#32;a&#32;first&#32;&lt;i&gt;sentence&lt;&#x2F;i&gt;&#46;'");
   }
 
+  public void testRangeQuery() {
+    assertQ(req("q", "id:101", "hl", "true", "hl.q", "text:[dob TO doe]"),
+        "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1");
+  }
+
   public void testRequireFieldMatch() {
     // We highlight on field text3 (hl.fl), but our query only references the "text" field.
Nonetheless, the query word
     //  "document" is found in all fields here.
diff --git a/solr/solr-ref-guide/src/highlighting.adoc b/solr/solr-ref-guide/src/highlighting.adoc
index c792a91..7f9eef9 100644
--- a/solr/solr-ref-guide/src/highlighting.adoc
+++ b/solr/solr-ref-guide/src/highlighting.adoc
@@ -156,13 +156,16 @@ We recommend that you try this highlighter even though it isn't the
default (yet
 +
 The UH highlights a query very _accurately_ and thus is true to what the underlying Lucene
query actually matches.
 Other highlighters highlight terms more liberally (over-highlight).
+For esoteric/custom queries, this highlighter has a greater likelihood of supporting it than
the others.
++
 A strong benefit to this highlighter is that you can opt to configure Solr to put more information
in the underlying index to speed up highlighting of large documents; multiple configurations
are supported, even on a per-field basis.
 There is little or no such flexibility of offset sources for the other highlighters.
 More on this below.
 +
-There are some reasons not to choose this highlighter: The `surround` query parser doesn't
yet work here -- SOLR-12895.
+There are some reasons not to choose this highlighter:
 Passage scoring does not consider boosts in the query.
-Some people want more/better passage breaking flexibility.
+Some users want more/better passage breaking flexibility.
+The "alternate" fallback options are more primitive.
 
 <<The Original Highlighter,Original Highlighter>>:: (`hl.method=original`, the
default)
 +
@@ -181,8 +184,8 @@ The FastVector Highlighter _requires_ full term vector options (`termVectors`,
`
 This highlighter notably supports multi-colored highlighting such that different query words
can be denoted in the fragment with different marking, usually expressed as an HTML tag with
a unique color.
 +
 This highlighter's query-representation is less advanced than the Original or Unified Highlighters:
for example it will not work well with the `surround` parser, and there are multiple reported
bugs pertaining to queries with stop-words.
-+
-Note that both the FastVector and Original Highlighters can be used in conjunction in a search
request to highlight some fields with one and some the other. In contrast, the other highlighters
can only be chosen exclusively.
+
+Both the FastVector and Original Highlighters can be used in conjunction in a search request
to highlight some fields with one and some the other. In contrast, the Unified Highlighter
can only be chosen exclusively.
 
 
 The Unified Highlighter is exclusively configured via search parameters. In contrast, some
settings for the Original and FastVector Highlighters are set in `solrconfig.xml`. There's
a robust example of the latter in the "```techproducts```" configset.


Mime
View raw message