lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1442590 - in /lucene/dev/trunk: dev-tools/maven/lucene/highlighter/ lucene/ lucene/highlighter/ lucene/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/ lucene/hi...
Date Tue, 05 Feb 2013 13:59:26 GMT
Author: simonw
Date: Tue Feb  5 13:59:26 2013
New Revision: 1442590

URL: http://svn.apache.org/viewvc?rev=1442590&view=rev
Log:
LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten against the highlighting
IndexReader to obtain primitive queries before discarding the query entirely.

Modified:
    lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/highlighter/build.xml
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
    lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java

Modified: lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template (original)
+++ lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template Tue Feb  5 13:59:26
2013
@@ -61,6 +61,11 @@
       <artifactId>lucene-memory</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>lucene-queries</artifactId>
+      <version>${project.version}</version>
+    </dependency>
   </dependencies>
   <build>
     <sourceDirectory>${module-path}/src/java</sourceDirectory>

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Feb  5 13:59:26 2013
@@ -94,6 +94,12 @@ New Features
 * LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
   creation via the resulting factories using NewAnalyzerTask.  (Steve Rowe)
 
+* LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten
+  against the highlighting IndexReader to obtain primitive queries before 
+  discarding the query entirely. WeightedSpanTermExtractor now builds a
+  MemoryIndex only once even if multiple fields are highlighted.
+  (Simon Willnauer)
+
 API Changes
 
 * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)

Modified: lucene/dev/trunk/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/build.xml?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/build.xml (original)
+++ lucene/dev/trunk/lucene/highlighter/build.xml Tue Feb  5 13:59:26 2013
@@ -27,6 +27,7 @@
 
   <path id="classpath">
     <pathelement path="${memory.jar}"/>
+    <pathelement path="${queries.jar}"/>
     <path refid="base.classpath"/>
   </path>
 

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
Tue Feb  5 13:59:26 2013
@@ -187,11 +187,9 @@ public class Highlighter
     ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
     StringBuilder newText=new StringBuilder();
 
-      CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
-      OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
-      tokenStream.addAttribute(PositionIncrementAttribute.class);
-      tokenStream.reset();
-
+    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+    tokenStream.reset();
     TextFragment currentFrag =  new TextFragment(newText,newText.length(), docFrags.size());
 
     if (fragmentScorer instanceof QueryScorer) {

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
Tue Feb  5 13:59:26 2013
@@ -18,7 +18,7 @@ package org.apache.lucene.search.highlig
  */
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -29,11 +29,18 @@ import java.util.TreeSet;
 
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterAtomicReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
@@ -44,6 +51,8 @@ import org.apache.lucene.search.spans.Sp
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+
 
 /**
  * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether

@@ -53,12 +62,13 @@ public class WeightedSpanTermExtractor {
 
   private String fieldName;
   private TokenStream tokenStream;
-  private Map<String,AtomicReaderContext> readers = new HashMap<String,AtomicReaderContext>(10);

   private String defaultField;
   private boolean expandMultiTermQuery;
   private boolean cachedTokenStream;
   private boolean wrapToCaching = true;
   private int maxDocCharsToAnalyze;
+  private AtomicReader reader = null;
+
 
   public WeightedSpanTermExtractor() {
   }
@@ -69,18 +79,6 @@ public class WeightedSpanTermExtractor {
     }
   }
 
-  private void closeReaders() {
-    Collection<AtomicReaderContext> ctxSet = readers.values();
-
-    for (final AtomicReaderContext ctx : ctxSet) {
-      try {
-        ctx.reader().close();
-      } catch (IOException e) {
-        // alert?
-      }
-    }
-  }
-
   /**
    * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the
terms from the supplied <code>Query</code>.
    * 
@@ -146,21 +144,14 @@ public class WeightedSpanTermExtractor {
       if (q != null) {
         extract(q, terms);
       }
+    } else if (query instanceof CommonTermsQuery) {
+      // specialized since rewriting would change the result query 
+      // this query is TermContext sensitive.
+      extractWeightedTerms(terms, query);
     } else if (query instanceof DisjunctionMaxQuery) {
       for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();)
{
         extract(iterator.next(), terms);
       }
-    } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
-      MultiTermQuery mtq = ((MultiTermQuery)query);
-      if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
-        mtq = (MultiTermQuery) mtq.clone();
-        mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
-        query = mtq;
-      }
-      if (mtq.getField() != null) {
-        IndexReader ir = getLeafContextForField(mtq.getField()).reader();
-        extract(query.rewrite(ir), terms);
-      }
     } else if (query instanceof MultiPhraseQuery) {
       final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
       final List<Term[]> termArrays = mpq.getTermArrays();
@@ -210,12 +201,30 @@ public class WeightedSpanTermExtractor {
         sp.setBoost(query.getBoost());
         extractWeightedSpanTerms(terms, sp);
       }
+    } else {
+      Query origQuery = query;
+      if (query instanceof MultiTermQuery) {
+        if (!expandMultiTermQuery) {
+          return;
+        }
+        MultiTermQuery copy = (MultiTermQuery) query.clone();
+        copy.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+        origQuery = copy;
+      }
+      final IndexReader reader = getLeafContext().reader();
+      Query rewritten = origQuery.rewrite(reader);
+      if (rewritten != origQuery) {
+        // only rewrite once and then flatten again - the rewritten query could have a speacial
treatment
+        // if this method is overwritten in a subclass or above in the next recursion
+        extract(rewritten, terms);
+      } 
     }
     extractUnknownQuery(query, terms);
   }
 
   protected void extractUnknownQuery(Query query,
       Map<String, WeightedSpanTerm> terms) throws IOException {
+    
     // for sub-classing to extract custom queries
   }
 
@@ -249,7 +258,7 @@ public class WeightedSpanTermExtractor {
     final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
     if (mustRewriteQuery) {
       for (final String field : fieldNames) {
-        final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader());
+        final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
         queries.put(field, rewrittenQuery);
         rewrittenQuery.extractTerms(nonWeightedTerms);
       }
@@ -266,7 +275,7 @@ public class WeightedSpanTermExtractor {
       } else {
         q = spanQuery;
       }
-      AtomicReaderContext context = getLeafContextForField(field);
+      AtomicReaderContext context = getLeafContext();
       Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
       TreeSet<Term> extractedTerms = new TreeSet<Term>();
       q.extractTerms(extractedTerms);
@@ -338,23 +347,69 @@ public class WeightedSpanTermExtractor {
     return rv;
   }
 
-  protected AtomicReaderContext getLeafContextForField(String field) throws IOException {
-    if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter))
{
-      tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
-      cachedTokenStream = true;
-    }
-    AtomicReaderContext context = readers.get(field);
-    if (context == null) {
-      MemoryIndex indexer = new MemoryIndex();
-      indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+  protected AtomicReaderContext getLeafContext() throws IOException {
+    if (reader == null) {
+      if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) {
+        assert !cachedTokenStream;
+        tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+        cachedTokenStream = true;
+      }
+      final MemoryIndex indexer = new MemoryIndex(true);
+      indexer.addField(DelegatingAtomicReader.FIELD_NAME, tokenStream);
       tokenStream.reset();
-      IndexSearcher searcher = indexer.createSearcher();
+      final IndexSearcher searcher = indexer.createSearcher();
       // MEM index has only atomic ctx
-      context = (AtomicReaderContext) searcher.getTopReaderContext();
-      readers.put(field, context);
+      reader = new DelegatingAtomicReader(((AtomicReaderContext)searcher.getTopReaderContext()).reader());
     }
+    return reader.getContext();
+  }
+  
+  /*
+   * This reader will just delegate every call to a single field in the wrapped
+   * AtomicReader. This way we only need to build this field once rather than
+   * N-Times
+   */
+  static final class DelegatingAtomicReader extends FilterAtomicReader {
+    private static final String FIELD_NAME = "shadowed_field";
 
-    return context;
+    DelegatingAtomicReader(AtomicReader in) {
+      super(in);
+    }
+    
+    @Override
+    public FieldInfos getFieldInfos() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Fields fields() throws IOException {
+      return new FilterFields(super.fields()) {
+        @Override
+        public Terms terms(String field) throws IOException {
+          return super.terms(DelegatingAtomicReader.FIELD_NAME);
+        }
+
+        @Override
+        public Iterator<String> iterator() {
+          return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
+        }
+
+        @Override
+        public int size() {
+          return 1;
+        }
+      };
+    }
+
+    @Override
+    public DocValues docValues(String field) throws IOException {
+      return super.docValues(FIELD_NAME);
+    }
+
+    @Override
+    public DocValues normValues(String field) throws IOException {
+      return super.normValues(FIELD_NAME);
+    }
   }
 
   /**
@@ -401,7 +456,7 @@ public class WeightedSpanTermExtractor {
     try {
       extract(query, terms);
     } finally {
-      closeReaders();
+      IOUtils.close(reader);
     }
 
     return terms;
@@ -449,8 +504,7 @@ public class WeightedSpanTermExtractor {
         weightedSpanTerm.weight *= idf;
       }
     } finally {
-
-      closeReaders();
+      IOUtils.close(reader);
     }
 
     return terms;

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
Tue Feb  5 13:59:26 2013
@@ -28,9 +28,12 @@ import java.util.Set;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
@@ -92,8 +95,7 @@ public class FieldQuery {
         if( !clause.isProhibited() )
           flatten( clause.getQuery(), reader, flatQueries );
       }
-    }
-    else if( sourceQuery instanceof DisjunctionMaxQuery ){
+    } else if( sourceQuery instanceof DisjunctionMaxQuery ){
       DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
       for( Query query : dmq ){
         flatten( query, reader, flatQueries );
@@ -103,12 +105,6 @@ public class FieldQuery {
       if( !flatQueries.contains( sourceQuery ) )
         flatQueries.add( sourceQuery );
     }
-    else if (sourceQuery instanceof MultiTermQuery && reader != null) {
-      MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
-      copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
-      BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
-      flatten(mtqTerms, reader, flatQueries);
-    }
     else if( sourceQuery instanceof PhraseQuery ){
       if( !flatQueries.contains( sourceQuery ) ){
         PhraseQuery pq = (PhraseQuery)sourceQuery;
@@ -118,6 +114,31 @@ public class FieldQuery {
           flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
         }
       }
+    } else if (sourceQuery instanceof ConstantScoreQuery) {
+      final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
+      if (q != null) {
+        flatten(q, reader, flatQueries);
+      }
+    } else if (sourceQuery instanceof FilteredQuery) {
+      final Query q = ((FilteredQuery) sourceQuery).getQuery();
+      if (q != null) {
+        flatten(q, reader, flatQueries);
+      }
+    } else if (reader != null){
+      Query query = sourceQuery;
+      if (sourceQuery instanceof MultiTermQuery) {
+        MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
+        copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
+        query = copy;
+      }
+      Query rewritten = query.rewrite(reader);
+      if (rewritten != query) {
+        // only rewrite once and then flatten again - the rewritten query could have a speacial
treatment
+        // if this method is overwritten in a subclass.
+        flatten(rewritten, reader, flatQueries);
+        
+      } 
+      // if the query is already rewritten we discard it
     }
     // else discard queries
   }

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Tue Feb  5 13:59:26 2013
@@ -46,6 +46,7 @@ import org.apache.lucene.index.StoredDoc
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
@@ -114,6 +115,87 @@ public class HighlighterTest extends Bas
     }
   }
   
+  public void testHighlightingCommonTermsQuery() throws Exception {
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+    query.add(new Term(FIELD_NAME, "this"));
+    query.add(new Term(FIELD_NAME, "long"));
+    query.add(new Term(FIELD_NAME, "very"));
+
+    searcher = new IndexSearcher(reader);
+    TopDocs hits = searcher.search(query, 10);
+    assertEquals(2, hits.totalHits);
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(scorer);
+
+    StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+    String storedField = doc.get(FIELD_NAME);
+
+    TokenStream stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+    highlighter.setTextFragmenter(fragmenter);
+    String fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B>
<B>long</B> and contains too much preamble and the meat is really here which says
kennedy has been shot", fragment);
+    
+    doc = searcher.doc(hits.scoreDocs[1].doc);
+    storedField = doc.get(FIELD_NAME);
+
+    stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+    fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning
then has a longer piece of text that is <B>very</B>", fragment);
+  }
+  
+  public void testHighlightUnknowQueryAfterRewrite() throws IOException, InvalidTokenOffsetsException
{
+    Query query = new Query() {
+      
+      @Override
+      public Query rewrite(IndexReader reader) throws IOException {
+        CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+        query.add(new Term(FIELD_NAME, "this"));
+        query.add(new Term(FIELD_NAME, "long"));
+        query.add(new Term(FIELD_NAME, "very"));
+        return query;
+      }
+
+      @Override
+      public String toString(String field) {
+        return null;
+      }
+      
+    };
+    
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    
+    searcher = new IndexSearcher(reader);
+    TopDocs hits = searcher.search(query, 10);
+    assertEquals(2, hits.totalHits);
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(scorer);
+
+    StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+    String storedField = doc.get(FIELD_NAME);
+
+    TokenStream stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+    highlighter.setTextFragmenter(fragmenter);
+    String fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B>
<B>long</B> and contains too much preamble and the meat is really here which says
kennedy has been shot", fragment);
+    
+    doc = searcher.doc(hits.scoreDocs[1].doc);
+    storedField = doc.get(FIELD_NAME);
+
+    stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+    fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning
then has a longer piece of text that is <B>very</B>", fragment);
+    
+  }
+  
   public void testHighlightingWithDefaultField() throws Exception {
 
     String s1 = "I call our world Flatland, not because we call it so,";
@@ -150,7 +232,7 @@ public class HighlighterTest extends Bas
         "Query in a named field does not result in highlighting when that field isn't in
the query",
         s1, highlightField(q, FIELD_NAME, s1));
   }
-
+  
   /**
    * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
    */
@@ -603,7 +685,7 @@ public class HighlighterTest extends Bas
     // Not sure we can assert anything here - just running to check we dont
     // throw any exceptions
   }
-  
+
   public void testSpanHighlighting() throws Exception {
     Query query1 = new SpanNearQuery(new SpanQuery[] {
         new SpanTermQuery(new Term(FIELD_NAME, "wordx")),

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
Tue Feb  5 13:59:26 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.search.vectorh
 import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -26,7 +28,13 @@ import org.apache.lucene.index.Directory
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -62,4 +70,47 @@ public class FastVectorHighlighterTest e
     writer.close();
     dir.close();
   }
+  
+  public void testCommonTermsQueryHighlightTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
 new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    String[] texts = {
+        "Hello this is a piece of text that is very long and contains too much preamble and
the meat is really here which says kennedy has been shot",
+        "This piece of text refers to Kennedy at the beginning then has a longer piece of
text that is very long in the middle and finally ends with another reference to Kennedy",
+        "JFK has been shot", "John Kennedy has been shot",
+        "This text has a typo in referring to Keneddy",
+        "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets
is a the lets is a the lets is a the lets" };
+    for (int i = 0; i < texts.length; i++) {
+      Document doc = new Document();
+      Field field = new Field("field", texts[i], type);
+      doc.add(field);
+      writer.addDocument(doc);
+    }
+    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
+    query.add(new Term("field", "text"));
+    query.add(new Term("field", "long"));
+    query.add(new Term("field", "very"));
+   
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    IndexSearcher searcher = new IndexSearcher(reader);
+    TopDocs hits = searcher.search(query, 10);
+    assertEquals(2, hits.totalHits);
+    FieldQuery fieldQuery  = highlighter.getFieldQuery(query, reader);
+    String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc,
"field", 1000, 1);
+    assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning
then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b>
in the middle and finally ends with another reference to Kennedy", bestFragments[0]);
+
+    fieldQuery  = highlighter.getFieldQuery(query, reader);
+    bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc,
"field", 1000, 1);
+    assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b>
<b>long</b> and contains too much preamble and the meat is really here which says
kennedy has been shot", bestFragments[0]);
+
+    reader.close();
+    writer.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
(original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
Tue Feb  5 13:59:26 2013
@@ -23,8 +23,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
@@ -35,6 +40,7 @@ import org.apache.lucene.search.TermRang
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
 public class FieldQueryTest extends AbstractTestCase {
@@ -905,4 +911,40 @@ public class FieldQueryTest extends Abst
     assertNotNull (fq.searchPhrase(F, phraseCandidate));
   }
   
+  public void testStopRewrite() throws Exception {
+    Query q = new Query() {
+
+      @Override
+      public String toString(String field) {
+        return "DummyQuery";
+      }
+      
+    };
+    make1d1fIndex( "a" );
+    assertNotNull(reader);
+    new FieldQuery(q, reader, true, true );
+  }
+  
+  public void testFlattenFilteredQuery() throws Exception {
+    Query query = new FilteredQuery(pqF( "A" ), new Filter() {
+      @Override
+      public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+          throws IOException {
+        return null;
+      }
+    });
+    FieldQuery fq = new FieldQuery( query, true, true );
+    Set<Query> flatQueries = new HashSet<Query>();
+    fq.flatten( query, reader, flatQueries );
+    assertCollectionQueries( flatQueries, tq( "A" ) );
+  }
+  
+  public void testFlattenConstantScoreQuery() throws Exception {
+    Query query = new ConstantScoreQuery(pqF( "A" ));
+    FieldQuery fq = new FieldQuery( query, true, true );
+    Set<Query> flatQueries = new HashSet<Query>();
+    fq.flatten( query, reader, flatQueries );
+    assertCollectionQueries( flatQueries, tq( "A" ) );
+  }
+  
 }

Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
(original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Tue Feb  5 13:59:26 2013
@@ -465,7 +465,9 @@ public class MemoryIndex {
       throw new RuntimeException(e);
     } finally {
       try {
-        if (stream != null) stream.close();
+        if (stream != null) {
+          stream.close();
+        }
       } catch (IOException e2) {
         throw new RuntimeException(e2);
       }



Mime
View raw message