lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r800796 - in /lucene/java/trunk/contrib: ./ benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ highlighter/src/java/org/apache/lucene/search/highlight/ highlighter/src/test/org/apache/lucene/search/highlight/
Date Tue, 04 Aug 2009 13:56:12 GMT
Author: markrmiller
Date: Tue Aug  4 13:56:11 2009
New Revision: 800796

URL: http://svn.apache.org/viewvc?rev=800796&view=rev
Log:
LUCENE-1685: The position aware SpanScorer has become the default scorer for Highlighting. The SpanScorer implementation has replaced QueryScorer and the old term highlighting QueryScorer has been renamed to QueryTermScorer. Multi-term queries are also now expanded by default. If you were previously rewritting the query for multi-term query highlighting, you should no longer do that (unless you switch to using QueryTermScorer). The SpanScorer API (now QueryScorer) has also been improved to more closely match the API of the previous QueryScorer implementation.

Added:
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
Removed:
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/spanscorer.html
Modified:
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/package.html
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Tue Aug  4 13:56:11 2009
@@ -120,7 +120,16 @@
     "(jo* -john) smyth~". (Mark Harwood via Mark Miller)
     
 14. Added web-based demo of functionality in contrib's XML Query Parser
-    packaged as War file (Mark Harwood)    
+    packaged as War file (Mark Harwood)
+
+15. LUCENE-1685: The position aware SpanScorer has become the default scorer
+    for Highlighting. The SpanScorer implementation has replaced QueryScorer
+    and the old term highlighting QueryScorer has been renamed to 
+    QueryTermScorer. Multi-term queries are also now expanded by default. If
+    you were previously rewritting the query for multi-term query highlighting,
+    you should no longer do that (unless you switch to using QueryTermScorer).
+    The SpanScorer API (now QueryScorer) has also been improved to more closely
+    match the API of the previous QueryScorer implementation.  (Mark Miller)  
 
 
 Optimizations

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java Tue Aug  4 13:56:11 2009
@@ -38,7 +38,7 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.highlight.Highlighter;
-import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.QueryTermScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.highlight.TokenSources;
@@ -242,7 +242,7 @@
   }
 
   protected Highlighter getHighlighter(Query q){
-    return new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
+    return new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(q));
   }
 
   /**

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Tue Aug  4 13:56:11 2009
@@ -223,7 +223,10 @@
 	    tokenStream.reset();
 	    
 		TextFragment currentFrag =	new TextFragment(newText,newText.length(), docFrags.size());
-		fragmentScorer.init(tokenStream);
+		TokenStream newStream = fragmentScorer.init(tokenStream);
+		if(newStream != null) {
+		  tokenStream = newStream;
+		}
 		fragmentScorer.startFragment(currentFrag);
 		docFrags.add(currentFrag);
 

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Tue Aug  4 13:56:11 2009
@@ -1,161 +1,227 @@
 package org.apache.lucene.search.highlight;
 
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
 
 /**
  * {@link Scorer} implementation which scores text fragments by the number of
- * unique query terms found. This class uses the {@link QueryTermExtractor}
- * class to process determine the query terms and their boosts to be used.
+ * unique query terms found. This class converts appropriate Querys to
+ * SpanQuerys and attempts to score only those terms that participated in
+ * generating the 'hit' on the document.
  */
-// TODO: provide option to boost score of fragments near beginning of document
-// based on fragment.getFragNum()
 public class QueryScorer implements Scorer {
-  
-  TextFragment currentTextFragment = null;
-  HashSet uniqueTermsInFragment;
-
-  float totalScore = 0;
-  float maxTermWeight = 0;
-  private HashMap termsToFind;
-
+  private float totalScore;
+  private Set foundTerms;
+  private Map fieldWeightedSpanTerms;
+  private float maxTermWeight;
+  private int position = -1;
+  private String defaultField;
   private TermAttribute termAtt;
+  private PositionIncrementAttribute posIncAtt;
+  private boolean expandMultiTermQuery = true;
+  private Query query;
+  private String field;
+  private IndexReader reader;
+  private boolean skipInitExtractor;
 
   /**
+   * @param query Query to use for highlighting
    * 
-   * @param query a Lucene query (ideally rewritten using query.rewrite before
-   *        being passed to this class and the searcher)
+   * @throws IOException
    */
   public QueryScorer(Query query) {
-    this(QueryTermExtractor.getTerms(query));
+    init(query, null, null, true);
   }
 
   /**
-   * 
-   * @param query a Lucene query (ideally rewritten using query.rewrite before
-   *        being passed to this class and the searcher)
-   * @param fieldName the Field name which is used to match Query terms
+   * @param query Query to use for highlighting
+   * @param field Field to highlight - pass null to ignore fields
+   * @throws IOException
    */
-  public QueryScorer(Query query, String fieldName) {
-    this(QueryTermExtractor.getTerms(query, false, fieldName));
+  public QueryScorer(Query query, String field) {
+    init(query, field, null, true);
   }
 
   /**
+   * @param query Query to use for highlighting
+   * @param field Field to highlight - pass null to ignore fields
    * 
-   * @param query a Lucene query (ideally rewritten using query.rewrite before
-   *        being passed to this class and the searcher)
-   * @param reader used to compute IDF which can be used to a) score selected
-   *        fragments better b) use graded highlights eg set font color
-   *        intensity
-   * @param fieldName the field on which Inverse Document Frequency (IDF)
-   *        calculations are based
+   * @param reader
+   * @throws IOException
+   */
+  public QueryScorer(Query query, IndexReader reader, String field) {
+    init(query, field, reader, true);
+  }
+
+  /**
+   * As above, but with ability to pass in an <tt>IndexReader</tt>
    */
-  public QueryScorer(Query query, IndexReader reader, String fieldName) {
-    this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
+  public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
+    throws IOException {
+    this.defaultField = defaultField.intern();
+    init(query, field, reader, true);
   }
 
-  public QueryScorer(WeightedTerm[] weightedTerms) {
-    termsToFind = new HashMap();
+  /**
+   * @param defaultField - The default field for queries with the field name unspecified
+   */
+  public QueryScorer(Query query, String field, String defaultField) {
+    this.defaultField = defaultField.intern();
+    init(query, field, null, true);
+  }
+
+  /**
+   * @param weightedTerms
+   */
+  public QueryScorer(WeightedSpanTerm[] weightedTerms) {
+    this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
+
     for (int i = 0; i < weightedTerms.length; i++) {
-      WeightedTerm existingTerm = (WeightedTerm) termsToFind
-          .get(weightedTerms[i].term);
-      if ((existingTerm == null)
-          || (existingTerm.weight < weightedTerms[i].weight)) {
-        // if a term is defined more than once, always use the highest scoring
-        // weight
-        termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
+      WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
+
+      if ((existingTerm == null) ||
+            (existingTerm.weight < weightedTerms[i].weight)) {
+        // if a term is defined more than once, always use the highest
+        // scoring weight
+        fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
         maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
       }
     }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
-   */
-  public void init(TokenStream tokenStream) {
-    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
+    skipInitExtractor = true;
   }
 
   /*
    * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
-   * .lucene.search.highlight.TextFragment)
+   *
+   * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
    */
-  public void startFragment(TextFragment newFragment) {
-    uniqueTermsInFragment = new HashSet();
-    currentTextFragment = newFragment;
-    totalScore = 0;
-
+  public float getFragmentScore() {
+    return totalScore;
   }
 
+  /**
+   *
+   * @return The highest weighted term (useful for passing to
+   *         GradientFormatter to set top end of coloring scale.
+   */
+  public float getMaxTermWeight() {
+    return maxTermWeight;
+  }
 
-  /* (non-Javadoc)
-   * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
+  /*
+   * (non-Javadoc)
+   *
+   * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
+   *      int)
    */
   public float getTokenScore() {
+    position += posIncAtt.getPositionIncrement();
     String termText = termAtt.term();
 
-    WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
-    if (queryTerm == null) {
-      // not a query term - return
+    WeightedSpanTerm weightedSpanTerm;
+
+    if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
+              termText)) == null) {
+      return 0;
+    }
+
+    if (weightedSpanTerm.positionSensitive &&
+          !weightedSpanTerm.checkPosition(position)) {
       return 0;
     }
+
+    float score = weightedSpanTerm.getWeight();
+
     // found a query term - is it unique in this doc?
-    if (!uniqueTermsInFragment.contains(termText)) {
-      totalScore += queryTerm.getWeight();
-      uniqueTermsInFragment.add(termText);
+    if (!foundTerms.contains(termText)) {
+      totalScore += score;
+      foundTerms.add(termText);
     }
-    return queryTerm.getWeight();
-  }
 
+    return score;
+  }
 
-  /* (non-Javadoc)
-   * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
+  public TokenStream init(TokenStream tokenStream) throws IOException {
+    position = -1;
+    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
+    posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
+    if(!skipInitExtractor) {
+      if(fieldWeightedSpanTerms != null) {
+        fieldWeightedSpanTerms.clear();
+      }
+      return initExtractor(tokenStream);
+    }
+    return null;
+  }
+  
+  /**
+   * Retrieve the WeightedSpanTerm for the specified token. Useful for passing
+   * Span information to a Fragmenter.
+   *
+   * @param token
+   * @return WeightedSpanTerm for token
    */
-  public float getFragmentScore() {
-    return totalScore;
+  public WeightedSpanTerm getWeightedSpanTerm(String token) {
+    return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
+  }
+
+  /**
+   * @param query
+   * @param field
+   * @param tokenStream
+   * @param reader
+   * @throws IOException
+   */
+  private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) {
+    this.reader = reader;
+    this.expandMultiTermQuery = expandMultiTermQuery;
+    this.query = query;
+    this.field = field;
+  }
+  
+  private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
+    WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
+        : new WeightedSpanTermExtractor(defaultField);
+
+    qse.setExpandMultiTermQuery(expandMultiTermQuery);
+    if (reader == null) {
+      this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
+          tokenStream, field);
+    } else {
+      this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
+          tokenStream, field, reader);
+    }
+    if(qse.isCachedTokenStream()) {
+      return qse.getTokenStream();
+    }
+    
+    return null;
   }
 
   /*
    * (non-Javadoc)
-   * 
-   * @see
-   * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
+   *
+   * @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
    */
-  public void allFragmentsProcessed() {
-    // this class has no special operations to perform at end of processing
+  public void startFragment(TextFragment newFragment) {
+    foundTerms = new HashSet();
+    totalScore = 0;
+  }
+  
+  public boolean isExpandMultiTermQuery() {
+    return expandMultiTermQuery;
   }
 
-  /**
-   * 
-   * @return The highest weighted term (useful for passing to GradientFormatter
-   *         to set top end of coloring scale.
-   */
-  public float getMaxTermWeight() {
-    return maxTermWeight;
+  public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
+    this.expandMultiTermQuery = expandMultiTermQuery;
   }
 }

Added: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java?rev=800796&view=auto
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java (added)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java Tue Aug  4 13:56:11 2009
@@ -0,0 +1,162 @@
+package org.apache.lucene.search.highlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+
+/**
+ * {@link Scorer} implementation which scores text fragments by the number of
+ * unique query terms found. This class uses the {@link QueryTermExtractor}
+ * class to process determine the query terms and their boosts to be used.
+ */
+// TODO: provide option to boost score of fragments near beginning of document
+// based on fragment.getFragNum()
+public class QueryTermScorer implements Scorer {
+  
+  TextFragment currentTextFragment = null;
+  HashSet uniqueTermsInFragment;
+
+  float totalScore = 0;
+  float maxTermWeight = 0;
+  private HashMap termsToFind;
+
+  private TermAttribute termAtt;
+
+  /**
+   * 
+   * @param query a Lucene query (ideally rewritten using query.rewrite before
+   *        being passed to this class and the searcher)
+   */
+  public QueryTermScorer(Query query) {
+    this(QueryTermExtractor.getTerms(query));
+  }
+
+  /**
+   * 
+   * @param query a Lucene query (ideally rewritten using query.rewrite before
+   *        being passed to this class and the searcher)
+   * @param fieldName the Field name which is used to match Query terms
+   */
+  public QueryTermScorer(Query query, String fieldName) {
+    this(QueryTermExtractor.getTerms(query, false, fieldName));
+  }
+
+  /**
+   * 
+   * @param query a Lucene query (ideally rewritten using query.rewrite before
+   *        being passed to this class and the searcher)
+   * @param reader used to compute IDF which can be used to a) score selected
+   *        fragments better b) use graded highlights eg set font color
+   *        intensity
+   * @param fieldName the field on which Inverse Document Frequency (IDF)
+   *        calculations are based
+   */
+  public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
+    this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
+  }
+
+  public QueryTermScorer(WeightedTerm[] weightedTerms) {
+    termsToFind = new HashMap();
+    for (int i = 0; i < weightedTerms.length; i++) {
+      WeightedTerm existingTerm = (WeightedTerm) termsToFind
+          .get(weightedTerms[i].term);
+      if ((existingTerm == null)
+          || (existingTerm.weight < weightedTerms[i].weight)) {
+        // if a term is defined more than once, always use the highest scoring
+        // weight
+        termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
+        maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
+      }
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
+   */
+  public TokenStream init(TokenStream tokenStream) {
+    termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
+    return null;
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
+   * .lucene.search.highlight.TextFragment)
+   */
+  public void startFragment(TextFragment newFragment) {
+    uniqueTermsInFragment = new HashSet();
+    currentTextFragment = newFragment;
+    totalScore = 0;
+
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
+   */
+  public float getTokenScore() {
+    String termText = termAtt.term();
+
+    WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
+    if (queryTerm == null) {
+      // not a query term - return
+      return 0;
+    }
+    // found a query term - is it unique in this doc?
+    if (!uniqueTermsInFragment.contains(termText)) {
+      totalScore += queryTerm.getWeight();
+      uniqueTermsInFragment.add(termText);
+    }
+    return queryTerm.getWeight();
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
+   */
+  public float getFragmentScore() {
+    return totalScore;
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
+   */
+  public void allFragmentsProcessed() {
+    // this class has no special operations to perform at end of processing
+  }
+
+  /**
+   * 
+   * @return The highest weighted term (useful for passing to GradientFormatter
+   *         to set top end of coloring scale.
+   */
+  public float getMaxTermWeight() {
+    return maxTermWeight;
+  }
+}

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java Tue Aug  4 13:56:11 2009
@@ -17,6 +17,8 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.analysis.TokenStream;
 
 /**
@@ -30,8 +32,9 @@
    * getTokenScore().
    * 
    * @param tokenStream
+   * @throws IOException 
    */
-  public void init(TokenStream tokenStream);
+  public TokenStream init(TokenStream tokenStream) throws IOException;
 
   /**
    * called when a new fragment is started for consideration

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java Tue Aug  4 13:56:11 2009
@@ -34,7 +34,7 @@
   private int fragmentSize;
   private int currentNumFrags;
   private int position = -1;
-  private SpanScorer spanScorer;
+  private QueryScorer queryScorer;
   private int waitForPos = -1;
   private int textSize;
   private TermAttribute termAtt;
@@ -42,19 +42,19 @@
   private OffsetAttribute offsetAtt;
 
   /**
-   * @param spanscorer SpanScorer that was used to score hits
+   * @param queryScorer QueryScorer that was used to score hits
    */
-  public SimpleSpanFragmenter(SpanScorer spanscorer) {
-    this(spanscorer, DEFAULT_FRAGMENT_SIZE);
+  public SimpleSpanFragmenter(QueryScorer queryScorer) {
+    this(queryScorer, DEFAULT_FRAGMENT_SIZE);
   }
 
   /**
-   * @param spanscorer SpanScorer that was used to score hits
+   * @param queryScorer QueryScorer that was used to score hits
    * @param fragmentSize size in bytes of each fragment
    */
-  public SimpleSpanFragmenter(SpanScorer spanscorer, int fragmentSize) {
+  public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) {
     this.fragmentSize = fragmentSize;
-    this.spanScorer = spanscorer;
+    this.queryScorer = queryScorer;
   }
   
   /* (non-Javadoc)
@@ -69,7 +69,7 @@
       return false;
     }
 
-    WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(termAtt.term());
+    WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
 
     if (wSpanTerm != null) {
       List positionSpans = wSpanTerm.getPositionSpans();

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Tue Aug  4 13:56:11 2009
@@ -57,11 +57,11 @@
 public class WeightedSpanTermExtractor {
 
   private String fieldName;
-  private CachingTokenFilter cachedTokenFilter;
+  private TokenStream tokenStream;
   private Map readers = new HashMap(10); // Map<String, IndexReader>
   private String defaultField;
-  private boolean highlightCnstScrRngQuery;
   private boolean expandMultiTermQuery;
+  private boolean cachedTokenStream;
 
   public WeightedSpanTermExtractor() {
   }
@@ -131,7 +131,7 @@
       for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
         extract((Query) iterator.next(), terms);
       }
-    } else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) {
+    } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
       MultiTermQuery mtq = ((MultiTermQuery)query);
       if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
         mtq = copyMultiTermQuery(mtq);
@@ -240,8 +240,7 @@
       while (spans.next()) {
         spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
       }
-
-      cachedTokenFilter.reset();
+      
     }
 
     if (spanPositions.size() == 0) {
@@ -301,15 +300,21 @@
     return rv;
   }
 
-  private IndexReader getReaderForField(String field) {
+  private IndexReader getReaderForField(String field) throws IOException {
+    if(!cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
+      tokenStream = new CachingTokenFilter(tokenStream);
+      cachedTokenStream = true;
+    }
     IndexReader reader = (IndexReader) readers.get(field);
     if (reader == null) {
       MemoryIndex indexer = new MemoryIndex();
-      indexer.addField(field, cachedTokenFilter);
+      indexer.addField(field, tokenStream);
+      tokenStream.reset();
       IndexSearcher searcher = indexer.createSearcher();
       reader = searcher.getIndexReader();
       readers.put(field, reader);
     }
+
     return reader;
   }
 
@@ -328,7 +333,7 @@
   public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter)
       throws IOException {
     this.fieldName = null;
-    this.cachedTokenFilter = cachingTokenFilter;
+    this.tokenStream = cachingTokenFilter;
 
     Map terms = new PositionCheckingMap();
     try {
@@ -354,14 +359,14 @@
    * @return
    * @throws IOException
    */
-  public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter,
+  public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
       String fieldName) throws IOException {
     if (fieldName != null) {
       this.fieldName = fieldName.intern();
     }
 
     Map terms = new PositionCheckingMap();
-    this.cachedTokenFilter = cachingTokenFilter;
+    this.tokenStream = tokenStream;
     try {
       extract(query, terms);
     } finally {
@@ -391,7 +396,7 @@
   public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
       IndexReader reader) throws IOException {
     this.fieldName = fieldName;
-    this.cachedTokenFilter = new CachingTokenFilter(tokenStream);
+    this.tokenStream = tokenStream;
 
     Map terms = new PositionCheckingMap();
     extract(query, terms);
@@ -419,23 +424,6 @@
 
     return terms;
   }
-
-  /**
-   * @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use
-   *             getExpandMultiTermQuery instead.
-   */
-  public boolean isHighlightCnstScrRngQuery() {
-    return highlightCnstScrRngQuery;
-  }
-  
-  /**
-   * @param highlightCnstScrRngQuery
-   * @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
-   *             setExpandMultiTermQuery option.
-   */
-  public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
-    this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
-  }
   
   /**
    * This class makes sure that if both position sensitive and insensitive
@@ -495,4 +483,12 @@
   public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
     this.expandMultiTermQuery = expandMultiTermQuery;
   }
+  
+  public boolean isCachedTokenStream() {
+    return cachedTokenStream;
+  }
+  
+  public TokenStream getTokenStream() {
+    return tokenStream;
+  }
 }

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/package.html?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/package.html (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/package.html Tue Aug  4 13:56:11 2009
@@ -5,7 +5,7 @@
 typically used to highlight search terms in the text of results pages.
 The Highlighter class is the central component and can be used to extract the
 most interesting sections of a piece of text and highlight them, with the help of
-Fragmenter, FragmentScorer, Formatter classes.
+Fragmenter, fragment Scorer, and Formatter classes.
 
 <h2>Example Usage</h2>
 
@@ -14,14 +14,16 @@
   IndexSearcher searcher = new IndexSearcher(directory);
   QueryParser parser = new QueryParser("notv", analyzer);
   Query query = parser.parse("million");
-  //query = query.rewrite(reader); //required to expand search terms
-  Hits hits = searcher.search(query);
+
+  TopDocs hits = searcher.search(query, 10);
 
   SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
   Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
   for (int i = 0; i < 10; i++) {
-    String text = hits.doc(i).get("notv");
-    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.id(i), "notv", analyzer);
+    int id = hits.scoreDocs[i].doc;
+    Document doc = searcher.doc(id);
+    String text = doc.get("notv");
+    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "notv", analyzer);
     TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
     for (int j = 0; j < frag.length; j++) {
       if ((frag[j] != null) && (frag[j].getScore() > 0)) {
@@ -29,8 +31,8 @@
       }
     }
     //Term vector
-    text = hits.doc(i).get("tv");
-    tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.id(i), "tv", analyzer);
+    text = doc.get("tv");
+    tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.scoreDocs[i].doc, "tv", analyzer);
     frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
     for (int j = 0; j < frag.length; j++) {
       if ((frag[j] != null) && (frag[j].getScore() > 0)) {

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=800796&r1=800795&r2=800796&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Tue Aug  4 13:56:11 2009
@@ -118,8 +118,9 @@
     query = qp.parse("\"very long\"");
     searcher = new IndexSearcher(ramDir, false);
     TopDocs hits = searcher.search(query, 10);
-
-    Highlighter highlighter = new Highlighter(null);
+    
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(scorer);
 
 
     for (int i = 0; i < hits.scoreDocs.length; i++) {
@@ -128,14 +129,12 @@
 
       TokenStream stream = TokenSources.getAnyTokenStream(searcher
           .getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
-      CachingTokenFilter ctf = new CachingTokenFilter(stream);
-      SpanScorer scorer = new SpanScorer(query, FIELD_NAME, ctf);
-     // ctf.reset();
+
       Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
-      highlighter.setFragmentScorer(scorer);
+
       highlighter.setTextFragmenter(fragmenter);
 
-      String fragment = highlighter.getBestFragment(ctf, storedField);
+      String fragment = highlighter.getBestFragment(stream, storedField);
 
       System.out.println(fragment);
     }
@@ -181,10 +180,10 @@
         fieldName, new StringReader(text)));
     // Assuming "<B>", "</B>" used to highlight
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
-    Highlighter highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName,
-        tokenStream, FIELD_NAME));
+    QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(formatter, scorer);
     highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
-    tokenStream.reset();
+
     String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
     return rv.length() == 0 ? text : rv;
   }
@@ -194,13 +193,14 @@
 
     int maxNumFragmentsRequired = 2;
 
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(scorer);
+    
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
-      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
-          new StringReader(text)));
-      Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream));
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
+          new StringReader(text));
       highlighter.setTextFragmenter(new SimpleFragmenter(40));
-      tokenStream.reset();
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
           "...");
@@ -225,9 +225,10 @@
     Analyzer analyzer = new WhitespaceAnalyzer();
     QueryParser qp = new QueryParser(f1, analyzer);
     Query query = qp.parse(q);
-    CachingTokenFilter stream = new CachingTokenFilter(analyzer.tokenStream(f1,
-        new StringReader(content)));
-    Scorer scorer = new SpanScorer(query, f1, stream, false);
+
+    QueryScorer scorer = new QueryScorer(query, f1);
+    scorer.setExpandMultiTermQuery(false);
+
     Highlighter h = new Highlighter(this, scorer);
 
     h.getBestFragment(analyzer, f1, content);
@@ -241,14 +242,14 @@
 
     int maxNumFragmentsRequired = 2;
 
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
-      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
-          new StringReader(text)));
-      Highlighter highlighter = new Highlighter(this,
-          new SpanScorer(query, FIELD_NAME, tokenStream));
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
       highlighter.setTextFragmenter(new SimpleFragmenter(40));
-      tokenStream.reset();
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
           "...");
@@ -264,14 +265,13 @@
 
     int maxNumFragmentsRequired = 2;
 
+    QueryScorer scorer =  new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this,scorer);
+    highlighter.setTextFragmenter(new SimpleFragmenter(40));
+    
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
-      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
-          new StringReader(text)));
-      Highlighter highlighter = new Highlighter(this,
-          new SpanScorer(query, FIELD_NAME, tokenStream));
-      highlighter.setTextFragmenter(new SimpleFragmenter(40));
-      tokenStream.reset();
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
           "...");
@@ -291,10 +291,10 @@
       String text = hits.doc(i).get(FIELD_NAME);
       CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
           new StringReader(text)));
-      Highlighter highlighter = new Highlighter(this,
-          new SpanScorer(query, FIELD_NAME, tokenStream));
+      QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+      Highlighter highlighter = new Highlighter(this, scorer);
+
       highlighter.setTextFragmenter(new SimpleFragmenter(40));
-      tokenStream.reset();
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
           "...");
@@ -310,14 +310,15 @@
 
     int maxNumFragmentsRequired = 2;
 
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
       CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
           .tokenStream(FIELD_NAME, new StringReader(text)));
-      SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
-      Highlighter highlighter = new Highlighter(this, spanscorer);
-      highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 5));
-      tokenStream.reset();
+
+      highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
 
       String result = highlighter.getBestFragments(tokenStream, text,
           maxNumFragmentsRequired, "...");
@@ -328,15 +329,16 @@
     doSearching("\"been shot\"");
 
     maxNumFragmentsRequired = 2;
+    
+    scorer = new QueryScorer(query, FIELD_NAME);
+    highlighter = new Highlighter(this, scorer);
 
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
       CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
           .tokenStream(FIELD_NAME, new StringReader(text)));
-      SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
-      Highlighter highlighter = new Highlighter(this, spanscorer);
-      highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 20));
-      tokenStream.reset();
+
+      highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
 
       String result = highlighter.getBestFragments(tokenStream, text,
           maxNumFragmentsRequired, "...");
@@ -350,15 +352,16 @@
     doSearching("y \"x y z\"");
 
     int maxNumFragmentsRequired = 2;
-
+    
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this,scorer);
+    
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
       CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
           new StringReader(text)));
-      Highlighter highlighter = new Highlighter(this,
-          new SpanScorer(query, FIELD_NAME, tokenStream));
+
       highlighter.setTextFragmenter(new SimpleFragmenter(40));
-      tokenStream.reset();
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
           "...");
@@ -421,7 +424,7 @@
 
   public void testSimpleHighlighter() throws Exception {
     doSearching("Kennedy");
-    Highlighter highlighter = new Highlighter(new QueryScorer(query));
+    Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
     highlighter.setTextFragmenter(new SimpleFragmenter(40));
     int maxNumFragmentsRequired = 2;
     for (int i = 0; i < hits.length(); i++) {
@@ -579,18 +582,15 @@
       String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
       int maxNumFragmentsRequired = 2;
       String fragmentSeparator = "...";
-      SpanScorer scorer = null;
+      QueryScorer scorer = null;
       TokenStream tokenStream = null;
 
       tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
           new StringReader(text)));
       
-      SpanScorer.setHighlightCnstScrRngQuery(true);
-      scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream);
-      
-      Highlighter highlighter = new Highlighter(this, scorer);
+      scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
 
-      ((CachingTokenFilter) tokenStream).reset();
+      Highlighter highlighter = new Highlighter(this, scorer);
 
       highlighter.setTextFragmenter(new SimpleFragmenter(20));
 
@@ -619,18 +619,16 @@
       String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
       int maxNumFragmentsRequired = 2;
       String fragmentSeparator = "...";
-      SpanScorer scorer = null;
+      QueryScorer scorer = null;
       TokenStream tokenStream = null;
 
       tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
           new StringReader(text)));
       
-      scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream, true);
+      scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
 
       Highlighter highlighter = new Highlighter(this, scorer);
 
-      ((CachingTokenFilter) tokenStream).reset();
-
       highlighter.setTextFragmenter(new SimpleFragmenter(20));
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@@ -650,18 +648,16 @@
       String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
       int maxNumFragmentsRequired = 2;
       String fragmentSeparator = "...";
-      SpanScorer scorer = null;
+      QueryScorer scorer = null;
       TokenStream tokenStream = null;
 
       tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
           new StringReader(text)));
       
-      scorer = new SpanScorer(query, null, (CachingTokenFilter) tokenStream, true);
+      scorer = new QueryScorer(query, null);
 
       Highlighter highlighter = new Highlighter(this, scorer);
 
-      ((CachingTokenFilter) tokenStream).reset();
-
       highlighter.setTextFragmenter(new SimpleFragmenter(20));
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@@ -681,18 +677,16 @@
       String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
       int maxNumFragmentsRequired = 2;
       String fragmentSeparator = "...";
-      SpanScorer scorer = null;
+      QueryScorer scorer = null;
       TokenStream tokenStream = null;
 
       tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
           new StringReader(text)));
       
-      scorer = new SpanScorer(query, "random_field", (CachingTokenFilter) tokenStream, HighlighterTest.FIELD_NAME, true);
+      scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
 
       Highlighter highlighter = new Highlighter(this, scorer);
 
-      ((CachingTokenFilter) tokenStream).reset();
-
       highlighter.setTextFragmenter(new SimpleFragmenter(20));
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@@ -744,7 +738,7 @@
 
       public void run() throws Exception {
         TermQuery query = new TermQuery(new Term("data", "help"));
-        Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
+        Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
         hg.setTextFragmenter(new NullFragmenter());
 
         String match = null;
@@ -900,7 +894,7 @@
 
         Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
         // Highlighter(new
-        // QueryScorer(wTerms));
+        // QueryTermScorer(wTerms));
         TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
         highlighter.setTextFragmenter(new SimpleFragmenter(2));
 
@@ -965,7 +959,7 @@
       public void run() throws Exception {
         numHighlights = 0;
         doSearching("Kennedy");
-        // new Highlighter(HighlighterTest.this, new QueryScorer(query));
+        // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
 
         for (int i = 0; i < hits.length(); i++) {
           String text = hits.doc(i).get(FIELD_NAME);
@@ -995,7 +989,7 @@
 
           Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
               HighlighterTest.this);// new Highlighter(this, new
-          // QueryScorer(query));
+          // QueryTermScorer(query));
           highlighter.setTextFragmenter(new SimpleFragmenter(20));
           String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
 
@@ -1027,7 +1021,7 @@
         TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
         Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
             HighlighterTest.this);// new Highlighter(this, new
-        // QueryScorer(query));
+        // QueryTermScorer(query));
         highlighter.setMaxDocBytesToAnalyze(30);
 
         highlighter.getBestFragment(tokenStream, texts[0]);
@@ -1062,7 +1056,7 @@
         Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(stopWords).tokenStream(
             "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
         // new
-        // QueryScorer(query));
+        // QueryTermScorer(query));
         hg.setTextFragmenter(new NullFragmenter());
         hg.setMaxDocBytesToAnalyze(100);
         match = hg.getBestFragment(new StandardAnalyzer(stopWords), "data", sb.toString());
@@ -1114,7 +1108,6 @@
 
       public void run() throws Exception {
         numHighlights = 0;
-        SpanScorer.setHighlightCnstScrRngQuery(false);
         // test to show how rewritten query can still be used
         searcher = new IndexSearcher(ramDir);
         Analyzer analyzer = new StandardAnalyzer();
@@ -1136,12 +1129,14 @@
 
         for (int i = 0; i < hits.length(); i++) {
           String text = hits.doc(i).get(FIELD_NAME);
-          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
-          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
-              HighlighterTest.this);
+          TokenStream tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, new StringReader(text)));
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);
+
           highlighter.setTextFragmenter(new SimpleFragmenter(40));
+
           String highlightedText = highlighter.getBestFragments(tokenStream, text,
               maxNumFragmentsRequired, "...");
+
           System.out.println(highlightedText);
         }
         // We expect to have zero highlights if the query is multi-terms and is
@@ -1198,8 +1193,8 @@
         return 1;
       }
 
-      public void init(TokenStream tokenStream) {
-        
+      public TokenStream init(TokenStream tokenStream) {
+        return null;
       }
     });
     highlighter.setTextFragmenter(new SimpleFragmenter(2000));
@@ -1266,7 +1261,7 @@
 
     // create an instance of the highlighter with the tags used to surround
     // highlighted text
-    Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+    Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
 
     for (int i = 0; i < hits.length(); i++) {
       String text = hits.doc(i).get(FIELD_NAME);
@@ -1293,9 +1288,10 @@
         if (mode == this.SPAN) {
           TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
           CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
-          fieldSpecificScorer = new SpanScorer(query, FIELD_NAME, ctf);
+          fieldSpecificScorer = new QueryScorer(query, FIELD_NAME);
+
         } else if (mode == this.STANDARD) {
-          fieldSpecificScorer = new QueryScorer(query, "contents");
+          fieldSpecificScorer = new QueryTermScorer(query, "contents");
         }
         Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
             fieldSpecificScorer);
@@ -1308,9 +1304,10 @@
         if (mode == this.SPAN) {
           TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
           CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
-          fieldInSpecificScorer = new SpanScorer(query, null, ctf);
+          fieldInSpecificScorer = new QueryScorer(query, null);
+
         } else if (mode == this.STANDARD) {
-          fieldInSpecificScorer = new QueryScorer(query);
+          fieldInSpecificScorer = new QueryTermScorer(query);
         }
 
         Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
@@ -1535,9 +1532,9 @@
     Query query = parser.parse( q );
     IndexSearcher searcher = new IndexSearcher( dir );
     // This scorer can return negative idf -> null fragment
-    Scorer scorer = new QueryScorer( query, searcher.getIndexReader(), "t_text1" );
+    Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
     // This scorer doesn't use idf (patch version)
-    //Scorer scorer = new QueryScorer( query, "t_text1" );
+    //Scorer scorer = new QueryTermScorer( query, "t_text1" );
     Highlighter h = new Highlighter( scorer );
 
     TopDocs hits = searcher.search(query, null, 10);
@@ -1606,10 +1603,10 @@
       String text = hits.doc(i).get(FIELD_NAME);
       CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
           new StringReader(text)));
-      Highlighter highlighter = new Highlighter(this,
-          new SpanScorer(query, FIELD_NAME, tokenStream));
+      QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+      Highlighter highlighter = new Highlighter(this, scorer);
+
       highlighter.setTextFragmenter(new SimpleFragmenter(40));
-      tokenStream.reset();
 
       String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
           "...");
@@ -1763,34 +1760,34 @@
     static final int SPAN = 1;
     int mode = STANDARD;
     Fragmenter frag = new SimpleFragmenter(20);
-
-    public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream,
-        Formatter formatter) {
+    
+    public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) {
+      return getHighlighter(query, fieldName, stream, formatter, true);
+    }
+    
+    public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) {
+      Scorer scorer = null;
       if (mode == STANDARD) {
-        return new Highlighter(formatter, new QueryScorer(query));
+        scorer = new QueryTermScorer(query);
       } else if (mode == SPAN) {
-        CachingTokenFilter tokenStream = new CachingTokenFilter(stream);
-        Highlighter highlighter;
-        try {
-          highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName, tokenStream));
-          tokenStream.reset();
-        } catch (IOException e) {
-          throw new RuntimeException(e);
+        scorer = new QueryScorer(query, fieldName);
+        if(!expanMultiTerm) {
+          ((QueryScorer)scorer).setExpandMultiTermQuery(false);
         }
-
-        return highlighter;
       } else {
         throw new RuntimeException("Unknown highlight mode");
       }
+      
+      return new Highlighter(formatter, scorer);
     }
 
     Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) {
       if (mode == STANDARD) {
-        return new Highlighter(formatter, new QueryScorer(weightedTerms));
+        return new Highlighter(formatter, new QueryTermScorer(weightedTerms));
       } else if (mode == SPAN) {
         Highlighter highlighter;
 
-        highlighter = new Highlighter(formatter, new SpanScorer((WeightedSpanTerm[]) weightedTerms));
+        highlighter = new Highlighter(formatter, new QueryScorer((WeightedSpanTerm[]) weightedTerms));
 
         return highlighter;
       } else {
@@ -1815,16 +1812,14 @@
         if (mode == SPAN) {
           tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
               new StringReader(text)));
-          scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME,
-              (CachingTokenFilter) tokenStream, expandMT);
+          scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
+
         } else if (mode == STANDARD) {
-          scorer = new QueryScorer(query);
+          scorer = new QueryTermScorer(query);
           tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
         }
         Highlighter highlighter = new Highlighter(formatter, scorer);
-        if (mode == SPAN) {
-          ((CachingTokenFilter) tokenStream).reset();
-        }
+
         highlighter.setTextFragmenter(frag);
 
         String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,



Mime
View raw message