lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mharw...@apache.org
Subject svn commit: r389888 - in /lucene/java/trunk/contrib/highlighter/src: java/org/apache/lucene/search/highlight/Highlighter.java java/org/apache/lucene/search/highlight/QueryTermExtractor.java test/org/apache/lucene/search/highlight/HighlighterTest.java
Date Wed, 29 Mar 2006 21:01:41 GMT
Author: mharwood
Date: Wed Mar 29 13:01:40 2006
New Revision: 389888

URL: http://svn.apache.org/viewcvs?rev=389888&view=rev
Log:
Simplified QueryTermExtractor.java to make use of Query.extractTerms method (especially now
that all the SpanQuery classes implement this correctly).
Added tests in Junit test to demonstrate new support for other Queries (FilteredQuery) now
that we use the standard extractTerms feature of Query objects.
Also deprecated highlighter getBestFragments method that hard-coded choice of fieldname and
introduced new variation that takes an additional fieldName argument

Modified:
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=389888&r1=389887&r2=389888&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
Wed Mar 29 13:01:40 2006
@@ -113,7 +113,8 @@
 	 * into chunks  
 	 * @param text        	text to highlight terms in
 	 * @param maxNumFragments  the maximum number of fragments.
-	 *
+	 * @deprecated This method incorrectly hardcodes the choice of fieldname. Use the
+	 * method of the same name that takes a fieldname.
 	 * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
 	 */
 	public final String[] getBestFragments(
@@ -123,6 +124,29 @@
 		throws IOException
 	{
 		TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
+		return getBestFragments(tokenStream, text, maxNumFragments);
+	}
+	/**
+	 * Highlights chosen terms in a text, extracting the most relevant sections.
+	 * This is a convenience method that calls
+	 * {@link #getBestFragments(TokenStream, String, int)}
+	 *
+	 * @param analyzer   the analyzer that will be used to split <code>text</code>
+	 * into chunks  
+	 * @param fieldName     the name of the field being highlighted (used by analyzer)
+	 * @param text        	text to highlight terms in
+	 * @param maxNumFragments  the maximum number of fragments.
+	 *
+	 * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+	 */
+	public final String[] getBestFragments(
+		Analyzer analyzer,	
+		String fieldName,
+		String text,
+		int maxNumFragments)
+		throws IOException
+	{
+		TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
 		return getBestFragments(tokenStream, text, maxNumFragments);
 	}
 	

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java?rev=389888&r1=389887&r2=389888&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
Wed Mar 29 13:01:40 2006
@@ -16,18 +16,12 @@
  */
 
 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
 
 /**
  * Utility class used to extract the terms used in a query, plus any weights.
@@ -114,75 +108,22 @@
 	//fieldname MUST be interned prior to this call
 	private static final void getTerms(Query query, HashSet terms,boolean prohibited, String
fieldName) 
 	{
-		if (query instanceof BooleanQuery)
-			getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
-		else
-			if (query instanceof PhraseQuery)
-				getTermsFromPhraseQuery((PhraseQuery) query, terms, fieldName);
-			else
-				if (query instanceof TermQuery)
-					getTermsFromTermQuery((TermQuery) query, terms, fieldName);
-				else
-		        if(query instanceof SpanNearQuery)
-		            getTermsFromSpanNearQuery((SpanNearQuery) query, terms, fieldName);
+       	try
+       	{
+       		HashSet nonWeightedTerms=new HashSet();
+       		query.extractTerms(nonWeightedTerms);
+       		for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();)
+			{
+				Term term = (Term) iter.next();
+			    if((fieldName==null)||(term.field()==fieldName))
+				{
+					terms.add(new WeightedTerm(query.getBoost(),term.text()));
+				}
+			}
+	      }
+	      catch(UnsupportedOperationException ignore)
+	      {
+	    	  //this is non-fatal for our purposes
+       	  }		        			        	
 	}
-
-	private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet terms, boolean
prohibited, String fieldName)
-	{
-		BooleanClause[] queryClauses = query.getClauses();
-		int i;
-
-		for (i = 0; i < queryClauses.length; i++)
-		{
-			//Pre Lucene 2.0 code
-//			if (prohibited || !queryClauses[i].prohibited)
-//				getTerms(queryClauses[i].query, terms, prohibited, fieldName);
-			// Lucene 2.0 ready code
-			if (prohibited || queryClauses[i].getOccur()!=BooleanClause.Occur.MUST_NOT)
-				getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
-		}
-	}
-
-	private static final void getTermsFromPhraseQuery(PhraseQuery query, HashSet terms, String
fieldName)
-	{
-		Term[] queryTerms = query.getTerms();
-		int i;
-
-		for (i = 0; i < queryTerms.length; i++)
-		{
-		    if((fieldName==null)||(queryTerms[i].field()==fieldName))
-		    {
-		        terms.add(new WeightedTerm(query.getBoost(),queryTerms[i].text()));
-		    }
-		}
-	}
-
-	private static final void getTermsFromTermQuery(TermQuery query, HashSet terms, String fieldName)
-	{
-	    if((fieldName==null)||(query.getTerm().field()==fieldName))
-	    {
-	        terms.add(new WeightedTerm(query.getBoost(),query.getTerm().text()));
-	    }
-	}
-
-    private static final void getTermsFromSpanNearQuery(SpanNearQuery query, HashSet terms,
String fieldName){
-
-        Collection queryTerms = query.getTerms();
-
-        for(Iterator iterator = queryTerms.iterator(); iterator.hasNext();){
-
-            // break it out for debugging.
-
-            Term term = (Term) iterator.next();
-
-            String text = term.text();
-
-    	    if((fieldName==null)||(term.field()==fieldName))
-    	    {
-    	        terms.add(new WeightedTerm(query.getBoost(), text));
-    	    }
-        }
-
-    }
-
 }

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=389888&r1=389887&r2=389888&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Wed Mar 29 13:01:40 2006
@@ -38,13 +38,20 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MultiSearcher;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RangeFilter;
 import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.RAMDirectory;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
@@ -140,6 +147,47 @@
 		//Currently highlights "John" and "Kennedy" separately
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights
== 2);
 	}
+	public void testGetBestFragmentsSpan() throws Exception
+	{
+		SpanQuery clauses[]={
+			new SpanTermQuery(new Term("contents","john")),
+			new SpanTermQuery(new Term("contents","kennedy")),
+			}; 
+		
+		SpanNearQuery snq=new SpanNearQuery(clauses,1,true);
+		doSearching(snq);
+		doStandardHighlights();
+		//Currently highlights "John" and "Kennedy" separately
+		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights
== 2);
+	}
+	public void testGetBestFragmentsFilteredQuery() throws Exception
+	{
+		RangeFilter rf=new RangeFilter("contents","john","john",true,true);
+		SpanQuery clauses[]={
+				new SpanTermQuery(new Term("contents","john")),
+				new SpanTermQuery(new Term("contents","kennedy")),
+				}; 
+		SpanNearQuery snq=new SpanNearQuery(clauses,1,true);
+		FilteredQuery fq=new FilteredQuery(snq,rf);
+		
+		doSearching(fq);
+		doStandardHighlights();
+		//Currently highlights "John" and "Kennedy" separately
+		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights
== 2);
+	}
+	public void testGetBestFragmentsFilteredPhraseQuery() throws Exception
+	{
+		RangeFilter rf=new RangeFilter("contents","john","john",true,true);
+		PhraseQuery pq=new PhraseQuery();
+		pq.add(new Term("contents","john"));
+		pq.add(new  Term("contents","kennedy"));
+		FilteredQuery fq=new FilteredQuery(pq,rf);
+		
+		doSearching(fq);
+		doStandardHighlights();
+		//Currently highlights "John" and "Kennedy" separately
+		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights
== 2);
+	}
 
 	public void testGetBestFragmentsMultiTerm() throws Exception
 	{
@@ -181,7 +229,7 @@
 		for (int i = 0; i < hits.length(); i++)
 		{
     		String text = hits.doc(i).get(FIELD_NAME);
-    		highlighter.getBestFragments(analyzer, text, 10);
+    		highlighter.getBestFragments(analyzer,FIELD_NAME, text, 10);
 		}
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights
== 4);
 
@@ -536,17 +584,21 @@
 		numHighlights++; //update stats used in assertions
 		return "<b>" + originalText + "</b>";
 	}
-
+	
 	public void doSearching(String queryString) throws Exception
 	{
-		searcher = new IndexSearcher(ramDir);
 		QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer());
 		query = parser.parse(queryString);
+		doSearching(query);
+	}
+	public void doSearching(Query unReWrittenQuery) throws Exception
+	{
+		searcher = new IndexSearcher(ramDir);
 		//for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a
rewritten query!
-		query=query.rewrite(reader);
+		query=unReWrittenQuery.rewrite(reader);
 		System.out.println("Searching for: " + query.toString(FIELD_NAME));
 		hits = searcher.search(query);
-	}
+	}	
 
 	void doStandardHighlights() throws Exception
 	{



Mime
View raw message