lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1569852 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/highlighter/ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
Date Wed, 19 Feb 2014 17:44:57 GMT
Author: rmuir
Date: Wed Feb 19 17:44:56 2014
New Revision: 1569852

URL: http://svn.apache.org/r1569852
Log:
LUCENE-5453: some trivial refactoring to postingshighlighter

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1569852&r1=1569851&r2=1569852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
Wed Feb 19 17:44:56 2014
@@ -456,31 +456,31 @@ public class PostingsHighlighter {
   private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator
bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages,
Query query) throws IOException {  
     Map<Integer,Object> highlights = new HashMap<Integer,Object>();
     
-    // reuse in the real sense... for docs in same segment we just advance our old enum
-    DocsAndPositionsEnum postings[] = null;
-    TermsEnum termsEnum = null;
-    int lastLeaf = -1;
-
     PassageFormatter fieldFormatter = getFormatter(field);
     if (fieldFormatter == null) {
       throw new NullPointerException("PassageFormatter cannot be null");
     }
     
-    // check if we should do any multitermprocessing
+    // check if we should do any multiterm processing
     Analyzer analyzer = getIndexAnalyzer(field);
     CharacterRunAutomaton automata[] = new CharacterRunAutomaton[0];
     if (analyzer != null) {
       automata = MultiTermHighlighting.extractAutomata(query, field);
     }
     
-    final BytesRef allTerms[];
+    // resize 'terms', where the last term is the multiterm matcher
     if (automata.length > 0) {
-      allTerms = new BytesRef[terms.length + 1];
-      System.arraycopy(terms, 0, allTerms, 0, terms.length);
-    } else {
-      allTerms = terms;
+      BytesRef newTerms[] = new BytesRef[terms.length + 1];
+      System.arraycopy(terms, 0, newTerms, 0, terms.length);
+      terms = newTerms;
     }
 
+    // we are processing in increasing docid order, so we only need to reinitialize stuff
on segment changes
+    // otherwise, we will just advance() existing enums to the new document in the same segment.
+    DocsAndPositionsEnum postings[] = null;
+    TermsEnum termsEnum = null;
+    int lastLeaf = -1;
+    
     for (int i = 0; i < docids.length; i++) {
       String content = contents[i];
       if (content.length() == 0) {
@@ -491,28 +491,39 @@ public class PostingsHighlighter {
       int leaf = ReaderUtil.subIndex(doc, leaves);
       AtomicReaderContext subContext = leaves.get(leaf);
       AtomicReader r = subContext.reader();
-      Terms t = r.terms(field);
-      if (t == null) {
-        continue; // nothing to do
-      }
+      
+      assert leaf >= lastLeaf; // increasing order
+      
+      // if the segment has changed, we must initialize new enums.
       if (leaf != lastLeaf) {
-        termsEnum = t.iterator(null);
-        postings = new DocsAndPositionsEnum[allTerms.length];
+        Terms t = r.terms(field);
+        if (t != null) {
+          termsEnum = t.iterator(null);
+          postings = new DocsAndPositionsEnum[terms.length];
+        }
+      }
+      if (termsEnum == null) {
+        continue; // no terms for this field, nothing to do
       }
+      
+      // if there are multi-term matches, we have to initialize the "fake" enum for each
document
       if (automata.length > 0) {
         DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field,
content), automata);
         dp.advance(doc - subContext.docBase);
-        postings[terms.length] = dp;
+        postings[terms.length-1] = dp; // last term is the multiterm matcher
       }
-      Passage passages[] = highlightDoc(field, allTerms, content.length(), bi, doc - subContext.docBase,
termsEnum, postings, maxPassages);
+      
+      Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase,
termsEnum, postings, maxPassages);
+      
       if (passages.length == 0) {
+        // no passages were returned, so ask for a default summary
         passages = getEmptyHighlight(field, bi, maxPassages);
       }
+
       if (passages.length > 0) {
-        // otherwise a null snippet (eg if field is missing
-        // entirely from the doc)
         highlights.put(doc, fieldFormatter.format(passages, content));
       }
+      
       lastLeaf = leaf;
     }
     



Mime
View raw message