lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r829129 - in /lucene/java/trunk/contrib/highlighter/src: java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java test/org/apache/lucene/search/highlight/HighlighterTest.java
Date Fri, 23 Oct 2009 17:08:50 GMT
Author: markrmiller
Date: Fri Oct 23 17:08:50 2009
New Revision: 829129

URL: http://svn.apache.org/viewvc?rev=829129&view=rev
Log:
LUCENE-2003: Highlighter doesn't respect position increments other than 1 with PhraseQuerys

Modified:
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=829129&r1=829128&r2=829129&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
Fri Oct 23 17:08:50 2009
@@ -95,13 +95,32 @@
         }
       }
     } else if (query instanceof PhraseQuery) {
-      Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
+      PhraseQuery phraseQuery = ((PhraseQuery) query);
+      Term[] phraseQueryTerms = phraseQuery.getTerms();
       SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
       for (int i = 0; i < phraseQueryTerms.length; i++) {
         clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
       }
+      int slop = phraseQuery.getSlop();
+      int[] positions = phraseQuery.getPositions();
+      // add largest position increment to slop
+      if (positions.length > 0) {
+        int lastPos = positions[0];
+        int largestInc = 0;
+        int sz = positions.length;
+        for (int i = 1; i < sz; i++) {
+          int pos = positions[i];
+          int inc = pos - lastPos;
+          if (inc > largestInc) {
+            largestInc = inc;
+          }
+          lastPos = pos;
+        }
+        if(largestInc > 1) {
+          slop += largestInc;
+        }
+      }
 
-      int slop = ((PhraseQuery) query).getSlop();
       boolean inorder = false;
 
       if (slop == 0) {

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=829129&r1=829128&r2=829129&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Fri Oct 23 17:08:50 2009
@@ -84,7 +84,7 @@
  */
 public class HighlighterTest extends BaseTokenStreamTestCase implements Formatter {
   // TODO: change to CURRENT, does not work because posIncr:
-  static final Version TEST_VERSION = Version.LUCENE_24;
+  static final Version TEST_VERSION = Version.LUCENE_29;
 
   private IndexReader reader;
   static final String FIELD_NAME = "contents";
@@ -100,7 +100,7 @@
       "This piece of text refers to Kennedy at the beginning then has a longer piece of text
that is very long in the middle and finally ends with another reference to Kennedy",
       "JFK has been shot", "John Kennedy has been shot",
       "This text has a typo in referring to Keneddy",
-      "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b" };
+      "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets
is a the lets is a the lets is a the lets" };
 
   /**
    * Constructor for HighlightExtractorTest.
@@ -256,6 +256,51 @@
 
     assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
         numHighlights == 3);
+    
+    numHighlights = 0;
+    doSearching("\"This piece of text refers to Kennedy\"");
+
+    maxNumFragmentsRequired = 2;
+
+    scorer = new QueryScorer(query, FIELD_NAME);
+    highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 4);
+    
+    numHighlights = 0;
+    doSearching("\"lets is a the lets is a the lets is a the lets\"");
+
+    maxNumFragmentsRequired = 2;
+
+    scorer = new QueryScorer(query, FIELD_NAME);
+    highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 4);
+    
   }
 
   public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
@@ -1531,6 +1576,7 @@
 
   public void doSearching(String queryString) throws Exception {
     QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
+    parser.setEnablePositionIncrements(true);
     parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
     query = parser.parse(queryString);
     doSearching(query);



Mime
View raw message