lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1458009 - in /lucene/dev/trunk/lucene: CHANGES.txt highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
Date Mon, 18 Mar 2013 21:20:11 GMT
Author: mikemccand
Date: Mon Mar 18 21:20:11 2013
New Revision: 1458009

URL: http://svn.apache.org/r1458009
Log:
LUCENE-4846: PostingsHighlighter allow customizing how the values to be highlighted are loaded
(default is still stored fields)

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1458009&r1=1458008&r2=1458009&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Mar 18 21:20:11 2013
@@ -102,6 +102,10 @@ New Features
 * LUCENE-4832: Add ToParentBlockJoinCollector.getTopGroupsWithAllChildDocs, to retrieve
   all children in each group.  (Aleksey Aleev via Mike McCandless)
 
+* LUCENE-4846: PostingsHighlighter subclasses can override where the
+  String values come from (it still defaults to pulling from stored
+  fields).  (Robert Muir, Mike McCandless)
+
 API Changes
 
 * LUCENE-4844: removed TaxonomyReader.getParent(), you should use

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1458009&r1=1458008&r2=1458009&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
Mon Mar 18 21:20:11 2013
@@ -81,7 +81,7 @@ import org.apache.lucene.util.UnicodeUti
  * This is thread-safe, and can be used across different readers.
  * @lucene.experimental
  */
-public final class PostingsHighlighter {
+public class PostingsHighlighter {
   
   // TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
   // but if the analyzer is really fast and the field is tiny, this might really be
@@ -257,15 +257,7 @@ public final class PostingsHighlighter {
     Arrays.sort(fields);
     
     // pull stored data:
-    LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
-    String contents[][] = new String[fields.length][docids.length];
-    for (int i = 0; i < docids.length; i++) {
-      searcher.doc(docids[i], visitor);
-      for (int j = 0; j < fields.length; j++) {
-        contents[j][i] = visitor.getValue(j).toString();
-      }
-      visitor.reset();
-    }
+    String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
     
     Map<String,String[]> highlights = new HashMap<String,String[]>();
     for (int i = 0; i < fields.length; i++) {
@@ -285,6 +277,25 @@ public final class PostingsHighlighter {
     }
     return highlights;
   }
+
+  /** Loads the String values for each field X docID to be
+   *  highlighted.  By default this loads from stored
+   *  fields, but a subclass can change the source.  This
+   *  method should allocate the String[fields.length][docids.length]
+   *  and fill all values.  The returned Strings must be
+   *  identical to what was indexed. */
+  protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids,
int maxLength) throws IOException {
+    String contents[][] = new String[fields.length][docids.length];
+    LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
+    for (int i = 0; i < docids.length; i++) {
+      searcher.doc(docids[i], visitor);
+      for (int j = 0; j < fields.length; j++) {
+        contents[j][i] = visitor.getValue(j).toString();
+      }
+      visitor.reset();
+    }
+    return contents;
+  }
     
   private Map<Integer,String> highlightField(String field, String contents[], BreakIterator
bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws
IOException {  
     Map<Integer,String> highlights = new HashMap<Integer,String>();

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1458009&r1=1458008&r2=1458009&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
Mon Mar 18 21:20:11 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.search.posting
  */
 
 import java.io.BufferedReader;
+import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.Map;
 
@@ -465,4 +466,46 @@ public class TestPostingsHighlighter ext
     ir.close();
     dir.close();
   }
+
+  public void testCustomFieldValueSource() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(),
MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    Document doc = new Document();
+
+    FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    final String text = "This is a test.  Just highlighting from postings. This is also a
much sillier test.  Feel free to test test test test test test test.";
+    Field body = new Field("body", text, offsetsType);
+    doc.add(body);
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(),
new PassageFormatter()) {
+        @Override
+        protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[]
docids, int maxLength) throws IOException {
+          assert fields.length == 1;
+          assert docids.length == 1;
+          String[][] contents = new String[1][1];
+          contents[0][0] = text;
+          return contents;
+        }
+      };
+
+    Query query = new TermQuery(new Term("body", "test"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This
is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b>
<b>test</b> <b>test</b> <b>test</b> <b>test</b>
<b>test</b>.", snippets[0]);
+    
+    ir.close();
+    dir.close();
+  }
 }



Mime
View raw message