lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cpoersc...@apache.org
Subject [49/50] lucene-solr:jira/solr-9045: LUCENE-7231: WeightedSpanTermExtractor correctly deals with single-term PhraseQuery
Date Thu, 19 May 2016 09:35:49 GMT
LUCENE-7231: WeightedSpanTermExtractor correctly deals with single-term PhraseQuery


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7793c06a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7793c06a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7793c06a

Branch: refs/heads/jira/solr-9045
Commit: 7793c06a30eb25ee08ee11a57ca696d3da4744b5
Parents: cfc13f5
Author: Alan Woodward <romseygeek@apache.org>
Authored: Tue May 17 14:08:45 2016 +0100
Committer: Alan Woodward <romseygeek@apache.org>
Committed: Thu May 19 09:48:43 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../highlight/WeightedSpanTermExtractor.java    | 33 ++++++++++--------
 .../search/highlight/HighlighterTest.java       | 35 +++++++++++++++++---
 3 files changed, 53 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7793c06a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1302684..5eabdb4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -121,6 +121,9 @@ Bug Fixes
 * LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan
   Woodward)
 
+* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term
+  phrase queries. (Eva Popenda, Alan Woodward)
+
 Documentation
 
 * LUCENE-7223: Improve XXXPoint javadocs to make it clear that you

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7793c06a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index 16b1d7b..89cbd11 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -115,24 +115,29 @@ public class WeightedSpanTermExtractor {
     } else if (query instanceof PhraseQuery) {
       PhraseQuery phraseQuery = ((PhraseQuery) query);
       Term[] phraseQueryTerms = phraseQuery.getTerms();
-      SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
-      for (int i = 0; i < phraseQueryTerms.length; i++) {
-        clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+      if (phraseQueryTerms.length == 1) {
+        extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
       }
+      else {
+        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
+        for (int i = 0; i < phraseQueryTerms.length; i++) {
+          clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+        }
 
-      // sum position increments beyond 1
-      int positionGaps = 0;
-      int[] positions = phraseQuery.getPositions();
-      if (positions.length >= 2) {
-        // positions are in increasing order.   max(0,...) is just a safeguard.
-        positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length
+ 1);
-      }
+        // sum position increments beyond 1
+        int positionGaps = 0;
+        int[] positions = phraseQuery.getPositions();
+        if (positions.length >= 2) {
+          // positions are in increasing order.   max(0,...) is just a safeguard.
+          positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length
+ 1);
+        }
 
-      //if original slop is 0 then require inOrder
-      boolean inorder = (phraseQuery.getSlop() == 0);
+        //if original slop is 0 then require inOrder
+        boolean inorder = (phraseQuery.getSlop() == 0);
 
-      SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps,
inorder);
-      extractWeightedSpanTerms(terms, sp, boost);
+        SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps,
inorder);
+        extractWeightedSpanTerms(terms, sp, boost);
+      }
     } else if (query instanceof TermQuery) {
       extractWeightedTerms(terms, query, boost);
     } else if (query instanceof SpanQuery) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7793c06a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index 936d121..0a034f1 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.lucene.search.highlight;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -28,9 +30,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CachingTokenFilter;
@@ -41,13 +40,14 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
@@ -93,6 +93,7 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Test;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
 
@@ -1560,6 +1561,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements
Formatte
     helper.start();
   }
 
+  @Test
+  public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException
{
+
+    final Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        return new TokenStreamComponents(new NGramTokenizer(4, 4));
+      }
+    };
+    final String fieldName = "substring";
+
+    final List<BytesRef> list = new ArrayList<>();
+    list.add(new BytesRef("uchu"));
+    final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
+
+    final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
+    final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
+
+    final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
+    highlighter.setTextFragmenter(new SimpleFragmenter(100));
+    final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
+
+    assertEquals("B<b>uchu</b>ng",fragment);
+
+  }
+
   public void testUnRewrittenQuery() throws Exception {
     final TestHighlightRunner helper = new TestHighlightRunner() {
 


Mime
View raw message