lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mharw...@apache.org
Subject svn commit: r652164 [2/2] - in /lucene/java/trunk/contrib/highlighter: ./ src/java/org/apache/lucene/search/highlight/ src/test/org/apache/lucene/search/highlight/
Date Tue, 29 Apr 2008 22:07:18 GMT
Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=652164&r1=652163&r2=652164&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Tue Apr 29 15:07:18 2008
@@ -21,32 +21,46 @@
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
 
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
 import junit.framework.TestCase;
 
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.MultiSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.RangeFilter;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
 import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.RAMDirectory;
@@ -55,864 +69,1389 @@
 
 /**
  * JUnit Test for Highlighter class.
+ * 
  * @author mark@searcharea.co.uk
  */
-public class HighlighterTest extends TestCase implements Formatter
-{
-	private IndexReader reader;
-	private static final String FIELD_NAME = "contents";
-	private Query query;
-	RAMDirectory ramDir;
-	public Searcher searcher = null;
-	public Hits hits = null;
-	int numHighlights = 0;
-	Analyzer analyzer=new StandardAnalyzer();
-
-	String texts[] =
-		{
-			"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
-			"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
-			"JFK has been shot",
-			"John Kennedy has been shot",
-			"This text has a typo in referring to Keneddy" };
-
-	/**
-	 * Constructor for HighlightExtractorTest.
-	 * @param arg0
-	 */
-	public HighlighterTest(String arg0)
-	{
-		super(arg0);
-	}
-
-	public void testSimpleHighlighter() throws Exception
-	{
-		doSearching("Kennedy");
-		Highlighter highlighter =	new Highlighter(new QueryScorer(query));
-		highlighter.setTextFragmenter(new SimpleFragmenter(40));
-		int maxNumFragmentsRequired = 2;
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-
-			String result =
-				highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired, "...");
-			System.out.println("\t" + result);
-		}
-		//Not sure we can assert anything here - just running to check we dont throw any exceptions
-	}
-
-
-
-	public void testGetBestFragmentsSimpleQuery() throws Exception
-	{
-		doSearching("Kennedy");
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
-	}
-	public void testGetFuzzyFragments() throws Exception
-	{
-		doSearching("Kinnedy~");
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
-	}
-
-	public void testGetWildCardFragments() throws Exception
-	{
-		doSearching("K?nnedy");
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
-	}
-	public void testGetMidWildCardFragments() throws Exception
-	{
-		doSearching("K*dy");
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
-	}
-	public void testGetRangeFragments() throws Exception
-	{
-		String queryString=FIELD_NAME + ":[kannedy TO kznnedy]"; 
-		
-		//Need to explicitly set the QueryParser property to use RangeQuery rather than RangeFilters
-		QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer());
-		parser.setUseOldRangeQuery(true);
-		query = parser.parse(queryString);
-		doSearching(query);
-		
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
-	}
-
-	public void testGetBestFragmentsPhrase() throws Exception
-	{
-		doSearching("\"John Kennedy\"");
-		doStandardHighlights();
-		//Currently highlights "John" and "Kennedy" separately
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
-	}
-	public void testGetBestFragmentsSpan() throws Exception
-	{
-		SpanQuery clauses[]={
-			new SpanTermQuery(new Term("contents","john")),
-			new SpanTermQuery(new Term("contents","kennedy")),
-			}; 
-		
-		SpanNearQuery snq=new SpanNearQuery(clauses,1,true);
-		doSearching(snq);
-		doStandardHighlights();
-		//Currently highlights "John" and "Kennedy" separately
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
-	}
-
-	public void testOffByOne() throws IOException 
-	{
-	    TermQuery query= new TermQuery( new Term( "data", "help" ));
-	    Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer( query ));
-	    hg.setTextFragmenter( new NullFragmenter() );
-
-	    String match = null;
-	    match = hg.getBestFragment( new StandardAnalyzer(), "data", "help me [54-65]");
-	    assertEquals("<B>help</B> me [54-65]", match);
-	} 	
-	public void testGetBestFragmentsFilteredQuery() throws Exception
-	{
-		RangeFilter rf=new RangeFilter("contents","john","john",true,true);
-		SpanQuery clauses[]={
-				new SpanTermQuery(new Term("contents","john")),
-				new SpanTermQuery(new Term("contents","kennedy")),
-				}; 
-		SpanNearQuery snq=new SpanNearQuery(clauses,1,true);
-		FilteredQuery fq=new FilteredQuery(snq,rf);
-		
-		doSearching(fq);
-		doStandardHighlights();
-		//Currently highlights "John" and "Kennedy" separately
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
-	}
-	public void testGetBestFragmentsFilteredPhraseQuery() throws Exception
-	{
-		RangeFilter rf=new RangeFilter("contents","john","john",true,true);
-		PhraseQuery pq=new PhraseQuery();
-		pq.add(new Term("contents","john"));
-		pq.add(new  Term("contents","kennedy"));
-		FilteredQuery fq=new FilteredQuery(pq,rf);
-		
-		doSearching(fq);
-		doStandardHighlights();
-		//Currently highlights "John" and "Kennedy" separately
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
-	}
-
-	public void testGetBestFragmentsMultiTerm() throws Exception
-	{
-		doSearching("John Kenn*");
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
-	}
-	public void testGetBestFragmentsWithOr() throws Exception
-	{
-		doSearching("JFK OR Kennedy");
-		doStandardHighlights();
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
-	}
-
-	public void testGetBestSingleFragment() throws Exception
-	{
-		doSearching("Kennedy");
-		Highlighter highlighter =new Highlighter(this,new QueryScorer(query));
-		highlighter.setTextFragmenter(new SimpleFragmenter(40));
-
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-			String result = highlighter.getBestFragment(tokenStream,text);
-			System.out.println("\t" + result);
-		}
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
-
-		numHighlights = 0;
-		for (int i = 0; i < hits.length(); i++)
-		{
-    		String text = hits.doc(i).get(FIELD_NAME);
-    		highlighter.getBestFragment(analyzer, FIELD_NAME,text);
-		}
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
-
-		numHighlights = 0;
-		for (int i = 0; i < hits.length(); i++)
-		{
-    		String text = hits.doc(i).get(FIELD_NAME);
-    		highlighter.getBestFragments(analyzer,FIELD_NAME, text, 10);
-		}
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
-
-	}
-
-	public void testGetBestSingleFragmentWithWeights() throws Exception
-	{
-		WeightedTerm[]wTerms=new WeightedTerm[2];
-		wTerms[0]=new WeightedTerm(10f,"hello");
-		wTerms[1]=new WeightedTerm(1f,"kennedy");
-		Highlighter highlighter =new Highlighter(new QueryScorer(wTerms));
-		TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
-		highlighter.setTextFragmenter(new SimpleFragmenter(2));
-
-		String result = highlighter.getBestFragment(tokenStream,texts[0]).trim();
-		assertTrue("Failed to find best section using weighted terms. Found: ["+result+"]"
-			, "<B>Hello</B>".equals(result));
-
-		//readjust weights
-		wTerms[1].setWeight(50f);
-		tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
-		highlighter =new Highlighter(new QueryScorer(wTerms));
-		highlighter.setTextFragmenter(new SimpleFragmenter(2));
-
-		result = highlighter.getBestFragment(tokenStream,texts[0]).trim();
-		assertTrue("Failed to find best section using weighted terms. Found: "+result
-			, "<B>kennedy</B>".equals(result));
-	}
-	
-	
-	// tests a "complex" analyzer that produces multiple 
-	// overlapping tokens 
-	public void testOverlapAnalyzer() throws Exception
-	{
-		HashMap synonyms = new HashMap();
-		synonyms.put("football", "soccer,footie");
-		Analyzer analyzer = new SynonymAnalyzer(synonyms);
-		String srchkey = "football";
-
-		String s = "football-soccer in the euro 2004 footie competition";
-		QueryParser parser=new QueryParser("bookid",analyzer);
-		Query query = parser.parse(srchkey);
-
-		Highlighter highlighter = new Highlighter(new QueryScorer(query));
-		TokenStream tokenStream =
-			analyzer.tokenStream(null, new StringReader(s));
-		// Get 3 best fragments and seperate with a "..."
-		String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
-		String expectedResult="<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
-		assertTrue("overlapping analyzer should handle highlights OK",expectedResult.equals(result));
-	}
-
-
-	public void testGetSimpleHighlight() throws Exception
-	{
-		doSearching("Kennedy");
-		Highlighter highlighter =
-			new Highlighter(this,new QueryScorer(query));
-
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-
-			String result = highlighter.getBestFragment(tokenStream,text);
-			System.out.println("\t" + result);
-		}
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
-	}
-
-
-	public void testGetTextFragments() throws Exception
-	{
-		doSearching("Kennedy");
-		Highlighter highlighter =
-			new Highlighter(this,new QueryScorer(query));
-		highlighter.setTextFragmenter(new SimpleFragmenter(20));
-
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-
-			String stringResults[] = highlighter.getBestFragments(tokenStream,text,10);
-
-			tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-			TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream,text,true,10);
-
-			assertTrue("Failed to find correct number of text Fragments: " + 
-				fragmentResults.length + " vs "+ stringResults.length, fragmentResults.length==stringResults.length);
-			for (int j = 0; j < stringResults.length; j++) 
-			{
-				System.out.println(fragmentResults[j]);
-				assertTrue("Failed to find same text Fragments: " + 
-					fragmentResults[j] + " found", fragmentResults[j].toString().equals(stringResults[j]));
-				
-			}
-			
-		}
-	}
-
-	public void testMaxSizeHighlight() throws Exception
-	{
-		doSearching("meat");
-		Highlighter highlighter =
-			new Highlighter(this,new QueryScorer(query));
-		highlighter.setMaxDocBytesToAnalyze(30);
-		TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
-		highlighter.getBestFragment(tokenStream,texts[0]);
-		assertTrue("Setting MaxDocBytesToAnalyze should have prevented " +
-			"us from finding matches for this record: " + numHighlights +
-			 " found", numHighlights == 0);
-	}
-	public void testMaxSizeHighlightTruncates() throws IOException 
-	{
-	    String goodWord="goodtoken";
-	    String stopWords[]={"stoppedtoken"};
-	    
-	    TermQuery query= new TermQuery( new Term( "data", goodWord ));
-	    SimpleHTMLFormatter fm=new SimpleHTMLFormatter();
-	    Highlighter hg = new Highlighter(fm, new QueryScorer( query ));
-	    hg.setTextFragmenter( new NullFragmenter() );
-
-	    String match = null;
-	    StringBuffer sb=new StringBuffer();
-	    sb.append(goodWord);
-	    for(int i=0;i<10000;i++)
-	    {
-	    	sb.append(" ");
-	    	sb.append(stopWords[0]);
-	    }
-	    	    	
-	    hg.setMaxDocBytesToAnalyze(100);
-	    match = hg.getBestFragment( new StandardAnalyzer(stopWords), "data", sb.toString());
-	    assertTrue("Matched text should be no more than 100 chars in length ", 
-	    		match.length()<hg.getMaxDocBytesToAnalyze());
-	    
-	    //add another tokenized word to the overrall length - but set way beyond 
-	    //the length of text under consideration (after a large slug of stop words + whitespace)
-	    sb.append(" ");
-	    sb.append(goodWord);
-	    match = hg.getBestFragment( new StandardAnalyzer(stopWords), "data", sb.toString());
-	    assertTrue("Matched text should be no more than 100 chars in length ", 
-	    		match.length()<hg.getMaxDocBytesToAnalyze());
-	    
-	    
-	} 	
-
-
-
-	public void testUnRewrittenQuery() throws IOException, ParseException
-	{
-		//test to show how rewritten query can still be used
-		searcher = new IndexSearcher(ramDir);
-		Analyzer analyzer=new StandardAnalyzer();
-
-		QueryParser parser=new QueryParser(FIELD_NAME,analyzer);	
-		Query query = parser.parse("JF? or Kenned*");
-		System.out.println("Searching with primitive query");
-		//forget to set this and...
-		//query=query.rewrite(reader);
-		Hits hits = searcher.search(query);
-
-		//create an instance of the highlighter with the tags used to surround highlighted text
-//		QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer());
-		Highlighter highlighter =
-			new Highlighter(this,new QueryScorer(query));
-
-		highlighter.setTextFragmenter(new SimpleFragmenter(40));
-
-		int maxNumFragmentsRequired = 3;
-
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-
-			String highlightedText = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,"...");
-			System.out.println(highlightedText);
-		}
-		//We expect to have zero highlights if the query is multi-terms and is not rewritten!
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
-	}
-
-	public void testNoFragments() throws Exception
-	{
-		doSearching("AnInvalidQueryWhichShouldYieldNoResults");
-		Highlighter highlighter =
-			new Highlighter(this,new QueryScorer(query));
-
-		for (int i = 0; i < texts.length; i++)
-		{
-			String text = texts[i];
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-
-			String result = highlighter.getBestFragment(tokenStream,text);
-			assertNull("The highlight result should be null for text with no query terms", result);
-		}
-	}
-	
-	/**
-	 * Demonstrates creation of an XHTML compliant doc using new encoding facilities.
-	 * @throws Exception
-	 */
-	public void testEncoding() throws Exception
-    {
-        String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article";
-        //run the highlighter on the raw content (scorer does not score any tokens for 
-        // highlighting but scores a single fragment for selection
-        Highlighter highlighter = new Highlighter(this,
-                new SimpleHTMLEncoder(), new Scorer()
-                {
-                    public void startFragment(TextFragment newFragment)
-                    {
-                    }
-                    public float getTokenScore(Token token)
-                    {
-                        return 0;
-                    }
-                    public float getFragmentScore()
-                    {
-                        return 1;
-                    }
-                });
-        highlighter.setTextFragmenter(new SimpleFragmenter(2000));
-        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
-                new StringReader(rawDocContent));
-
-        String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent,1,"");
-        //An ugly bit of XML creation:
-        String xhtml="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+
-            		"<!DOCTYPE html\n"+
-            		"PUBLIC \"//W3C//DTD XHTML 1.0 Transitional//EN\"\n"+
-            		"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"+
-            		"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n"+
-            		"<head>\n"+
-            		"<title>My Test HTML Document</title>\n"+
-            		"</head>\n"+
-            		"<body>\n"+
-            		"<h2>"+encodedSnippet+"</h2>\n"+
-            		"</body>\n"+
-            		"</html>";
-        //now an ugly built of XML parsing to test the snippet is encoded OK 
-  		DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
-  		DocumentBuilder db = dbf.newDocumentBuilder();
-  		org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes()));
-  		Element root=doc.getDocumentElement();  		
-  		NodeList nodes=root.getElementsByTagName("body");
-  		Element body=(Element) nodes.item(0);
-  		nodes=body.getElementsByTagName("h2");
-        Element h2=(Element) nodes.item(0); 
-        String decodedSnippet=h2.getFirstChild().getNodeValue();
-        assertEquals("XHTML Encoding should have worked:", rawDocContent,decodedSnippet);
-    }
-
-	public void testMultiSearcher() throws Exception
-	{
-		//setup index 1
-		RAMDirectory ramDir1 = new RAMDirectory();
-		IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true);
-		Document d = new Document();
-		Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED);
-		d.add(f);
-		writer1.addDocument(d);
-		writer1.optimize();
-		writer1.close();
-		IndexReader reader1 = IndexReader.open(ramDir1);
-
-		//setup index 2
-		RAMDirectory ramDir2 = new RAMDirectory();
-		IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true);
-		d = new Document();
-		f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED);
-		d.add(f);
-		writer2.addDocument(d);
-		writer2.optimize();
-		writer2.close();
-		IndexReader reader2 = IndexReader.open(ramDir2);
-
-
-
-		IndexSearcher searchers[]=new IndexSearcher[2];
-		searchers[0] = new IndexSearcher(ramDir1);
-		searchers[1] = new IndexSearcher(ramDir2);
-		MultiSearcher multiSearcher=new MultiSearcher(searchers);
-		QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer());
-		query = parser.parse("multi*");
-		System.out.println("Searching for: " + query.toString(FIELD_NAME));
-		//at this point the multisearcher calls combine(query[])
-		hits = multiSearcher.search(query);
-
-		//query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer());
-		Query expandedQueries[]=new Query[2];
-		expandedQueries[0]=query.rewrite(reader1);
-		expandedQueries[1]=query.rewrite(reader2);
-		query=query.combine(expandedQueries);
-
-
-		//create an instance of the highlighter with the tags used to surround highlighted text
-		Highlighter highlighter =
-			new Highlighter(this,new QueryScorer(query));
-
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-			String highlightedText = highlighter.getBestFragment(tokenStream,text);
-			System.out.println(highlightedText);
-		}
-		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
-
-	}
-	
-	public void testFieldSpecificHighlighting() throws IOException, ParseException
-	{
-		String docMainText="fred is one of the people";
-		QueryParser parser=new QueryParser(FIELD_NAME,analyzer);
-		Query query=parser.parse("fred category:people");
-		
-		//highlighting respects fieldnames used in query
-		QueryScorer fieldSpecificScorer=new QueryScorer(query, "contents");
-		Highlighter fieldSpecificHighlighter =
-			new Highlighter(new SimpleHTMLFormatter(),fieldSpecificScorer);
-		fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter());
-		String result=fieldSpecificHighlighter.getBestFragment(analyzer,FIELD_NAME,docMainText);
-		assertEquals("Should match",result,"<B>fred</B> is one of the people");
-		
-		//highlighting does not respect fieldnames used in query
-		QueryScorer fieldInSpecificScorer=new QueryScorer(query);
-		Highlighter fieldInSpecificHighlighter =
-			new Highlighter(new SimpleHTMLFormatter(),fieldInSpecificScorer);
-		fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter());
-		result=fieldInSpecificHighlighter.getBestFragment(analyzer,FIELD_NAME,docMainText);
-		assertEquals("Should match",result,"<B>fred</B> is one of the <B>people</B>");
-		
-		
-		reader.close();
-		
-	}
+public class HighlighterTest extends TestCase implements Formatter {
+  private IndexReader reader;
+  static final String FIELD_NAME = "contents";
+  private Query query;
+  RAMDirectory ramDir;
+  public Searcher searcher = null;
+  public Hits hits = null;
+  int numHighlights = 0;
+  Analyzer analyzer = new StandardAnalyzer();
+
+  String[] texts = {
+      "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
+      "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
+      "JFK has been shot", "John Kennedy has been shot",
+      "This text has a typo in referring to Keneddy",
+      "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b" };
+
+  /**
+   * Constructor for HighlightExtractorTest.
+   * 
+   * @param arg0
+   */
+  public HighlighterTest(String arg0) {
+    super(arg0);
+  }
+
+  public void testHighlightingWithDefaultField() throws Exception {
+
+    String s1 = "I call our world Flatland, not because we call it so,";
+
+    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+
+    // Verify that a query against the default field results in text being
+    // highlighted
+    // regardless of the field name.
+    Query q = parser.parse("\"world Flatland\"~3");
+    String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
+    String observed = highlightField(q, "SOME_FIELD_NAME", s1);
+    System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
+    assertEquals("Query in the default field results in text for *ANY* field being highlighted",
+        expected, observed);
+
+    // Verify that a query against a named field does not result in any
+    // highlighting
+    // when the query field name differs from the name of the field being
+    // highlighted,
+    // which in this example happens to be the default field name.
+    q = parser.parse("text:\"world Flatland\"~3");
+    expected = s1;
+    observed = highlightField(q, FIELD_NAME, s1);
+    System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
+    assertEquals(
+        "Query in a named field does not result in highlighting when that field isn't in the query",
+        s1, highlightField(q, FIELD_NAME, s1));
+  }
+
+  /**
+   * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
+   */
+  private static String highlightField(Query query, String fieldName, String text)
+      throws IOException {
+    CachingTokenFilter tokenStream = new CachingTokenFilter(new StandardAnalyzer().tokenStream(
+        fieldName, new StringReader(text)));
+    // Assuming "<B>", "</B>" used to highlight
+    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
+    Highlighter highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName,
+        tokenStream, FIELD_NAME));
+    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
+    tokenStream.reset();
+    String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
+    return rv.length() == 0 ? text : rv;
+  }
+
+  public void testSimpleSpanHighlighter() throws Exception {
+    doSearching("Kennedy");
+
+    int maxNumFragmentsRequired = 2;
+
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
+          new StringReader(text)));
+      Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream));
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      tokenStream.reset();
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+
+    // Not sure we can assert anything here - just running to check we dont
+    // throw any exceptions
+  }
+
+  public void testSimpleSpanPhraseHighlighting() throws Exception {
+    doSearching("\"very long and contains\"");
+
+    int maxNumFragmentsRequired = 2;
+
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
+          new StringReader(text)));
+      Highlighter highlighter = new Highlighter(this,
+          new SpanScorer(query, FIELD_NAME, tokenStream));
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      tokenStream.reset();
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 3);
+  }
+
+  public void testSimpleSpanPhraseHighlighting2() throws Exception {
+    doSearching("\"text piece long\"~5");
+
+    int maxNumFragmentsRequired = 2;
+
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
+          new StringReader(text)));
+      Highlighter highlighter = new Highlighter(this,
+          new SpanScorer(query, FIELD_NAME, tokenStream));
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      tokenStream.reset();
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 6);
+  }
+
+  public void testSimpleSpanPhraseHighlighting3() throws Exception {
+    doSearching("\"x y z\"");
+
+    int maxNumFragmentsRequired = 2;
+
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
+          new StringReader(text)));
+      Highlighter highlighter = new Highlighter(this,
+          new SpanScorer(query, FIELD_NAME, tokenStream));
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      tokenStream.reset();
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+
+      assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+          numHighlights == 3);
+    }
+  }
+
+  public void testSpanMultiPhraseQueryHighlighting() throws Exception {
+    MultiPhraseQuery mpq = new MultiPhraseQuery();
+
+    mpq.add(new Term[] { new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb") });
+    mpq.add(new Term(FIELD_NAME, "wordy"));
+
+    doSearching(mpq);
+
+    final int maxNumFragmentsRequired = 2;
+    assertExpectedHighlightCount(maxNumFragmentsRequired, 6);
+  }
+
+  public void testSpanMultiPhraseQueryHighlightingWithGap() throws Exception {
+    MultiPhraseQuery mpq = new MultiPhraseQuery();
+
+    /*
+     * The toString of MultiPhraseQuery doesn't work so well with these
+     * out-of-order additions, but the Query itself seems to match accurately.
+     */
+
+    mpq.add(new Term[] { new Term(FIELD_NAME, "wordz") }, 2);
+    mpq.add(new Term[] { new Term(FIELD_NAME, "wordx") }, 0);
+
+    doSearching(mpq);
+
+    final int maxNumFragmentsRequired = 1;
+    final int expectedHighlights = 2;
+
+    assertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights);
+  }
+
+  public void testNearSpanSimpleQuery() throws Exception {
+    doSearching(new SpanNearQuery(new SpanQuery[] {
+        new SpanTermQuery(new Term(FIELD_NAME, "beginning")),
+        new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false));
+
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        mode = SPAN;
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+      }
+    };
+
+    helper.run();
+
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 2);
+  }
+
+  public void testSimpleHighlighter() throws Exception {
+    doSearching("Kennedy");
+    Highlighter highlighter = new Highlighter(new QueryScorer(query));
+    highlighter.setTextFragmenter(new SimpleFragmenter(40));
+    int maxNumFragmentsRequired = 2;
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+    }
+    // Not sure we can assert anything here - just running to check we dont
+    // throw any exceptions
+  }
+
+  public void testSpanHighlighting() throws Exception {
+    Query query1 = new SpanNearQuery(new SpanQuery[] {
+        new SpanTermQuery(new Term(FIELD_NAME, "wordx")),
+        new SpanTermQuery(new Term(FIELD_NAME, "wordy")) }, 1, false);
+    Query query2 = new SpanNearQuery(new SpanQuery[] {
+        new SpanTermQuery(new Term(FIELD_NAME, "wordy")),
+        new SpanTermQuery(new Term(FIELD_NAME, "wordc")) }, 1, false);
+    BooleanQuery bquery = new BooleanQuery();
+    bquery.add(query1, Occur.SHOULD);
+    bquery.add(query2, Occur.SHOULD);
+    doSearching(bquery);
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        mode = SPAN;
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+      }
+    };
+
+    helper.run();
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 7);
+  }
+
+  public void testNotSpanSimpleQuery() throws Exception {
+    doSearching(new SpanNotQuery(new SpanNearQuery(new SpanQuery[] {
+        new SpanTermQuery(new Term(FIELD_NAME, "shot")),
+        new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false), new SpanTermQuery(
+        new Term(FIELD_NAME, "john"))));
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        mode = SPAN;
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+      }
+    };
+
+    helper.run();
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 4);
+  }
+
+  public void testGetBestFragmentsSimpleQuery() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("Kennedy");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 4);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetFuzzyFragments() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("Kinnedy~");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 5);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetWildCardFragments() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("K?nnedy");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 4);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetMidWildCardFragments() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("K*dy");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 5);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetRangeFragments() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        String queryString = FIELD_NAME + ":[kannedy TO kznnedy]";
+
+        // Need to explicitly set the QueryParser property to use RangeQuery
+        // rather
+        // than RangeFilters
+        QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+        parser.setUseOldRangeQuery(true);
+        query = parser.parse(queryString);
+        doSearching(query);
+
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 5);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetConstantScoreRangeFragments() throws Exception {
+
+    numHighlights = 0;
+    String queryString = FIELD_NAME + ":[kannedy TO kznnedy]";
+
+    // Need to explicitly set the QueryParser property to use RangeQuery
+    // rather
+    // than RangeFilters
+    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+    // parser.setUseOldRangeQuery(true);
+    query = parser.parse(queryString);
+
+    searcher = new IndexSearcher(ramDir);
+    // can't rewrite ConstantScoreRangeQuery if you want to highlight it -
+    // it rewrites to ConstantScoreQuery which cannot be highlighted
+    // query = unReWrittenQuery.rewrite(reader);
+    System.out.println("Searching for: " + query.toString(FIELD_NAME));
+    hits = searcher.search(query);
+
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
+      int maxNumFragmentsRequired = 2;
+      String fragmentSeparator = "...";
+      SpanScorer scorer = null;
+      TokenStream tokenStream = null;
+
+      tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
+          new StringReader(text)));
+      scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream);
+      scorer.setHighlightCnstScrRngQuery(true);
+
+      Highlighter highlighter = new Highlighter(this, scorer);
+
+      ((CachingTokenFilter) tokenStream).reset();
+
+      highlighter.setTextFragmenter(new SimpleFragmenter(20));
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          fragmentSeparator);
+      System.out.println("\t" + result);
+    }
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 5);
+  }
+
+  public void testGetBestFragmentsPhrase() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("\"John Kennedy\"");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        // Currently highlights "John" and "Kennedy" separately
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 2);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetBestFragmentsSpan() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
+            new SpanTermQuery(new Term("contents", "kennedy")), };
+
+        SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
+        doSearching(snq);
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        // Currently highlights "John" and "Kennedy" separately
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 2);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testOffByOne() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        TermQuery query = new TermQuery(new Term("data", "help"));
+        Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
+        hg.setTextFragmenter(new NullFragmenter());
+
+        String match = null;
+        match = hg.getBestFragment(new StandardAnalyzer(), "data", "help me [54-65]");
+        assertEquals("<B>help</B> me [54-65]", match);
+
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetBestFragmentsFilteredQuery() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        RangeFilter rf = new RangeFilter("contents", "john", "john", true, true);
+        SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
+            new SpanTermQuery(new Term("contents", "kennedy")), };
+        SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
+        FilteredQuery fq = new FilteredQuery(snq, rf);
+
+        doSearching(fq);
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        // Currently highlights "John" and "Kennedy" separately
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 2);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetBestFragmentsFilteredPhraseQuery() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        RangeFilter rf = new RangeFilter("contents", "john", "john", true, true);
+        PhraseQuery pq = new PhraseQuery();
+        pq.add(new Term("contents", "john"));
+        pq.add(new Term("contents", "kennedy"));
+        FilteredQuery fq = new FilteredQuery(pq, rf);
+
+        doSearching(fq);
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        // Currently highlights "John" and "Kennedy" separately
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 2);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetBestFragmentsMultiTerm() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("John Kenn*");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 5);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testGetBestFragmentsWithOr() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("JFK OR Kennedy");
+        doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 5);
+      }
+    };
+    helper.start();
+  }
+
+  public void testGetBestSingleFragment() throws Exception {
+
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        doSearching("Kennedy");
+        numHighlights = 0;
+        for (int i = 0; i < hits.length(); i++) {
+          String text = hits.doc(i).get(FIELD_NAME);
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);
+          highlighter.setTextFragmenter(new SimpleFragmenter(40));
+          String result = highlighter.getBestFragment(tokenStream, text);
+          System.out.println("\t" + result);
+        }
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 4);
+
+        numHighlights = 0;
+        for (int i = 0; i < hits.length(); i++) {
+          String text = hits.doc(i).get(FIELD_NAME);
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);
+          highlighter.getBestFragment(analyzer, FIELD_NAME, text);
+        }
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 4);
+
+        numHighlights = 0;
+        for (int i = 0; i < hits.length(); i++) {
+          String text = hits.doc(i).get(FIELD_NAME);
+
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);
+          highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
+        }
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 4);
+
+      }
+
+    };
+
+    helper.start();
+
+  }
+
+  public void testGetBestSingleFragmentWithWeights() throws Exception {
+
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
+        wTerms[0] = new WeightedSpanTerm(10f, "hello");
+
+        List positionSpans = new ArrayList();
+        positionSpans.add(new PositionSpan(0, 0));
+        wTerms[0].addPositionSpans(positionSpans);
+
+        wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
+        positionSpans = new ArrayList();
+        positionSpans.add(new PositionSpan(14, 14));
+        wTerms[1].addPositionSpans(positionSpans);
+
+        Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
+        // Highlighter(new
+        // QueryScorer(wTerms));
+        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
+        highlighter.setTextFragmenter(new SimpleFragmenter(2));
+
+        String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
+        assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]",
+            "<B>Hello</B>".equals(result));
+
+        // readjust weights
+        wTerms[1].setWeight(50f);
+        tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
+        highlighter = getHighlighter(wTerms, HighlighterTest.this);
+        highlighter.setTextFragmenter(new SimpleFragmenter(2));
+
+        result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
+        assertTrue("Failed to find best section using weighted terms. Found: " + result,
+            "<B>kennedy</B>".equals(result));
+      }
+
+    };
+
+    helper.start();
+
+  }
+
+  // tests a "complex" analyzer that produces multiple
+  // overlapping tokens
+  public void testOverlapAnalyzer() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        HashMap synonyms = new HashMap();
+        synonyms.put("football", "soccer,footie");
+        Analyzer analyzer = new SynonymAnalyzer(synonyms);
+        String srchkey = "football";
+
+        String s = "football-soccer in the euro 2004 footie competition";
+        QueryParser parser = new QueryParser("bookid", analyzer);
+        Query query = parser.parse(srchkey);
+
+        TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
+        Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);
+
+        // Get 3 best fragments and seperate with a "..."
+        tokenStream = analyzer.tokenStream(null, new StringReader(s));
+        String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
+        String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
+        assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
+            + " actual:" + result, expectedResult.equals(result));
+      }
+
+    };
+
+    helper.start();
+
+  }
+
+  public void testGetSimpleHighlight() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("Kennedy");
+        // new Highlighter(HighlighterTest.this, new QueryScorer(query));
+
+        for (int i = 0; i < hits.length(); i++) {
+          String text = hits.doc(i).get(FIELD_NAME);
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);
+          String result = highlighter.getBestFragment(tokenStream, text);
+          System.out.println("\t" + result);
+        }
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 4);
+      }
+    };
+    helper.start();
+  }
+
+  public void testGetTextFragments() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+
+        doSearching("Kennedy");
+
+        for (int i = 0; i < hits.length(); i++) {
+          String text = hits.doc(i).get(FIELD_NAME);
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);// new Highlighter(this, new
+          // QueryScorer(query));
+          highlighter.setTextFragmenter(new SimpleFragmenter(20));
+          String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
+
+          tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+          TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
+              true, 10);
+
+          assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length
+              + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
+          for (int j = 0; j < stringResults.length; j++) {
+            System.out.println(fragmentResults[j]);
+            assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found",
+                fragmentResults[j].toString().equals(stringResults[j]));
+
+          }
+
+        }
+      }
+    };
+    helper.start();
+  }
+
+  public void testMaxSizeHighlight() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        doSearching("meat");
+        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
+        Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+            HighlighterTest.this);// new Highlighter(this, new
+        // QueryScorer(query));
+        highlighter.setMaxDocBytesToAnalyze(30);
+
+        highlighter.getBestFragment(tokenStream, texts[0]);
+        assertTrue("Setting MaxDocBytesToAnalyze should have prevented "
+            + "us from finding matches for this record: " + numHighlights + " found",
+            numHighlights == 0);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testMaxSizeHighlightTruncates() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        String goodWord = "goodtoken";
+        String stopWords[] = { "stoppedtoken" };
+
+        TermQuery query = new TermQuery(new Term("data", goodWord));
+
+        String match = null;
+        StringBuffer sb = new StringBuffer();
+        sb.append(goodWord);
+        for (int i = 0; i < 10000; i++) {
+          sb.append(" ");
+          sb.append(stopWords[0]);
+        }
+        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
+        Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(stopWords).tokenStream(
+            "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
+        // new
+        // QueryScorer(query));
+        hg.setTextFragmenter(new NullFragmenter());
+        hg.setMaxDocBytesToAnalyze(100);
+        match = hg.getBestFragment(new StandardAnalyzer(stopWords), "data", sb.toString());
+        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
+            .getMaxDocBytesToAnalyze());
+
+        // add another tokenized word to the overrall length - but set way
+        // beyond
+        // the length of text under consideration (after a large slug of stop
+        // words
+        // + whitespace)
+        sb.append(" ");
+        sb.append(goodWord);
+        match = hg.getBestFragment(new StandardAnalyzer(stopWords), "data", sb.toString());
+        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
+            .getMaxDocBytesToAnalyze());
+      }
+    };
+
+    helper.start();
+
+  }
+
+  public void testUnRewrittenQuery() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        numHighlights = 0;
+        // test to show how rewritten query can still be used
+        searcher = new IndexSearcher(ramDir);
+        Analyzer analyzer = new StandardAnalyzer();
+
+        QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
+        Query query = parser.parse("JF? or Kenned*");
+        System.out.println("Searching with primitive query");
+        // forget to set this and...
+        // query=query.rewrite(reader);
+        Hits hits = searcher.search(query);
+
+        // create an instance of the highlighter with the tags used to surround
+        // highlighted text
+        // QueryHighlightExtractor highlighter = new
+        // QueryHighlightExtractor(this,
+        // query, new StandardAnalyzer());
+
+        int maxNumFragmentsRequired = 3;
+
+        for (int i = 0; i < hits.length(); i++) {
+          String text = hits.doc(i).get(FIELD_NAME);
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);
+          highlighter.setTextFragmenter(new SimpleFragmenter(40));
+          String highlightedText = highlighter.getBestFragments(tokenStream, text,
+              maxNumFragmentsRequired, "...");
+          System.out.println(highlightedText);
+        }
+        // We expect to have zero highlights if the query is multi-terms and is
+        // not
+        // rewritten!
+        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+            numHighlights == 0);
+      }
+    };
+
+    helper.start();
+  }
+
+  public void testNoFragments() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        doSearching("AnInvalidQueryWhichShouldYieldNoResults");
+
+        for (int i = 0; i < texts.length; i++) {
+          String text = texts[i];
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
+              HighlighterTest.this);
+          String result = highlighter.getBestFragment(tokenStream, text);
+          assertNull("The highlight result should be null for text with no query terms", result);
+        }
+      }
+    };
+
+    helper.start();
+  }
+
+  /**
+   * Demonstrates creation of an XHTML compliant doc using new encoding facilities.
+   * 
+   * @throws Exception
+   */
+  public void testEncoding() throws Exception {
+
+    String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article";
+    // run the highlighter on the raw content (scorer does not score any tokens
+    // for
+    // highlighting but scores a single fragment for selection
+    Highlighter highlighter = new Highlighter(this, new SimpleHTMLEncoder(), new Scorer() {
+      public void startFragment(TextFragment newFragment) {
+      }
+
+      public float getTokenScore(Token token) {
+        return 0;
+      }
+
+      public float getFragmentScore() {
+        return 1;
+      }
+    });
+    highlighter.setTextFragmenter(new SimpleFragmenter(2000));
+    TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent));
+
+    String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, "");
+    // An ugly bit of XML creation:
+    String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE html\n"
+        + "PUBLIC \"//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
+        + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+        + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n"
+        + "<head>\n" + "<title>My Test HTML Document</title>\n" + "</head>\n" + "<body>\n" + "<h2>"
+        + encodedSnippet + "</h2>\n" + "</body>\n" + "</html>";
+    // now an ugly built of XML parsing to test the snippet is encoded OK
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+    DocumentBuilder db = dbf.newDocumentBuilder();
+    org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes()));
+    Element root = doc.getDocumentElement();
+    NodeList nodes = root.getElementsByTagName("body");
+    Element body = (Element) nodes.item(0);
+    nodes = body.getElementsByTagName("h2");
+    Element h2 = (Element) nodes.item(0);
+    String decodedSnippet = h2.getFirstChild().getNodeValue();
+    assertEquals("XHTML Encoding should have worked:", rawDocContent, decodedSnippet);
+  }
+
+  public void testMultiSearcher() throws Exception {
+    // setup index 1
+    RAMDirectory ramDir1 = new RAMDirectory();
+    IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true);
+    Document d = new Document();
+    Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED);
+    d.add(f);
+    writer1.addDocument(d);
+    writer1.optimize();
+    writer1.close();
+    IndexReader reader1 = IndexReader.open(ramDir1);
+
+    // setup index 2
+    RAMDirectory ramDir2 = new RAMDirectory();
+    IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true);
+    d = new Document();
+    f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED);
+    d.add(f);
+    writer2.addDocument(d);
+    writer2.optimize();
+    writer2.close();
+    IndexReader reader2 = IndexReader.open(ramDir2);
+
+    IndexSearcher searchers[] = new IndexSearcher[2];
+    searchers[0] = new IndexSearcher(ramDir1);
+    searchers[1] = new IndexSearcher(ramDir2);
+    MultiSearcher multiSearcher = new MultiSearcher(searchers);
+    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+    query = parser.parse("multi*");
+    System.out.println("Searching for: " + query.toString(FIELD_NAME));
+    // at this point the multisearcher calls combine(query[])
+    hits = multiSearcher.search(query);
+
+    // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer());
+    Query expandedQueries[] = new Query[2];
+    expandedQueries[0] = query.rewrite(reader1);
+    expandedQueries[1] = query.rewrite(reader2);
+    query = query.combine(expandedQueries);
+
+    // create an instance of the highlighter with the tags used to surround
+    // highlighted text
+    Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+      String highlightedText = highlighter.getBestFragment(tokenStream, text);
+      System.out.println(highlightedText);
+    }
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 2);
+
+  }
+
+  public void testFieldSpecificHighlighting() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
+
+      public void run() throws Exception {
+        String docMainText = "fred is one of the people";
+        QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
+        Query query = parser.parse("fred category:people");
+
+        // highlighting respects fieldnames used in query
+
+        Scorer fieldSpecificScorer = null;
+        if (mode == this.SPAN) {
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
+          CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
+          fieldSpecificScorer = new SpanScorer(query, FIELD_NAME, ctf);
+        } else if (mode == this.STANDARD) {
+          fieldSpecificScorer = new QueryScorer(query, "contents");
+        }
+        Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
+            fieldSpecificScorer);
+        fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter());
+        String result = fieldSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
+        assertEquals("Should match", result, "<B>fred</B> is one of the people");
+
+        // highlighting does not respect fieldnames used in query
+        Scorer fieldInSpecificScorer = null;
+        if (mode == this.SPAN) {
+          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
+          CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
+          fieldInSpecificScorer = new SpanScorer(query, null, ctf);
+        } else if (mode == this.STANDARD) {
+          fieldInSpecificScorer = new QueryScorer(query);
+        }
+
+        Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
+            fieldInSpecificScorer);
+        fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter());
+        result = fieldInSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
+        assertEquals("Should match", result, "<B>fred</B> is one of the <B>people</B>");
+
+        reader.close();
+      }
+    };
+
+    helper.start();
+
+  }
 
   protected TokenStream getTS2() {
-    //String s = "Hi-Speed10 foo";
+    // String s = "Hi-Speed10 foo";
     return new TokenStream() {
       Iterator iter;
       List lst;
       {
         lst = new ArrayList();
         Token t;
-        t = new Token("hi",0,2);
+        t = new Token("hi", 0, 2);
         lst.add(t);
-        t = new Token("hispeed",0,8);
+        t = new Token("hispeed", 0, 8);
         lst.add(t);
-        t = new Token("speed",3,8);
+        t = new Token("speed", 3, 8);
         t.setPositionIncrement(0);
         lst.add(t);
-        t = new Token("10",8,10);
+        t = new Token("10", 8, 10);
         lst.add(t);
-        t = new Token("foo",11,14);
+        t = new Token("foo", 11, 14);
         lst.add(t);
         iter = lst.iterator();
       }
+
       public Token next() throws IOException {
-        return iter.hasNext() ? (Token)iter.next() : null;
+        return iter.hasNext() ? (Token) iter.next() : null;
       }
     };
   }
 
   // same token-stream as above, but the bigger token comes first this time
   protected TokenStream getTS2a() {
-    //String s = "Hi-Speed10 foo";
+    // String s = "Hi-Speed10 foo";
     return new TokenStream() {
       Iterator iter;
       List lst;
       {
         lst = new ArrayList();
         Token t;
-        t = new Token("hispeed",0,8);
+        t = new Token("hispeed", 0, 8);
         lst.add(t);
-        t = new Token("hi",0,2);
+        t = new Token("hi", 0, 2);
         t.setPositionIncrement(0);
         lst.add(t);
-        t = new Token("speed",3,8);
+        t = new Token("speed", 3, 8);
         lst.add(t);
-        t = new Token("10",8,10);
+        t = new Token("10", 8, 10);
         lst.add(t);
-        t = new Token("foo",11,14);
+        t = new Token("foo", 11, 14);
         lst.add(t);
         iter = lst.iterator();
       }
+
       public Token next() throws IOException {
-        return iter.hasNext() ? (Token)iter.next() : null;
+        return iter.hasNext() ? (Token) iter.next() : null;
       }
     };
   }
 
-  public void testOverlapAnalyzer2() throws Exception
-  {
+  public void testOverlapAnalyzer2() throws Exception {
+    TestHighlightRunner helper = new TestHighlightRunner() {
 
-    String s = "Hi-Speed10 foo";
-
-    Query query; Highlighter highlighter; String result;
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("foo");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2(), s, 3, "...");
-    assertEquals("Hi-Speed10 <B>foo</B>",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("10");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2(), s, 3, "...");
-    assertEquals("Hi-Speed<B>10</B> foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2(), s, 3, "...");
-    assertEquals("<B>Hi</B>-Speed10 foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("speed");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2(), s, 3, "...");
-    assertEquals("Hi-<B>Speed</B>10 foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hispeed");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2(), s, 3, "...");
-    assertEquals("<B>Hi-Speed</B>10 foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi speed");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2(), s, 3, "...");
-    assertEquals("<B>Hi-Speed</B>10 foo",result);
-
-    /////////////////// same tests, just put the bigger overlapping token first
-        query = new QueryParser("text",new WhitespaceAnalyzer()).parse("foo");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
-    assertEquals("Hi-Speed10 <B>foo</B>",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("10");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
-    assertEquals("Hi-Speed<B>10</B> foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
-    assertEquals("<B>Hi</B>-Speed10 foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("speed");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
-    assertEquals("Hi-<B>Speed</B>10 foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hispeed");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
-    assertEquals("<B>Hi-Speed</B>10 foo",result);
-
-    query = new QueryParser("text",new WhitespaceAnalyzer()).parse("hi speed");
-    highlighter = new Highlighter(new QueryScorer(query));
-    result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
-    assertEquals("<B>Hi-Speed</B>10 foo",result);
-  }
-
-
-/*
-
-	public void testBigramAnalyzer() throws IOException, ParseException
-	{
-		//test to ensure analyzers with none-consecutive start/end offsets
-		//dont double-highlight text
-		//setup index 1
-		RAMDirectory ramDir = new RAMDirectory();
-		Analyzer bigramAnalyzer=new CJKAnalyzer();
-		IndexWriter writer = new IndexWriter(ramDir,bigramAnalyzer , true);
-		Document d = new Document();
-		Field f = new Field(FIELD_NAME, "java abc def", true, true, true);
-		d.add(f);
-		writer.addDocument(d);
-		writer.close();
-		IndexReader reader = IndexReader.open(ramDir);
-
-		IndexSearcher searcher=new IndexSearcher(reader);
-		query = QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer);
-		System.out.println("Searching for: " + query.toString(FIELD_NAME));
-		hits = searcher.search(query);
-
-		Highlighter highlighter =
-			new Highlighter(this,new QueryFragmentScorer(query));
-
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			TokenStream tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
-			String highlightedText = highlighter.getBestFragment(tokenStream,text);
-			System.out.println(highlightedText);
-		}
-
-	}
-*/
-
-
-	public String highlightTerm(String originalText , TokenGroup group)
-	{
-		if(group.getTotalScore()<=0)
-		{
-			return originalText;
-		}
-		numHighlights++; //update stats used in assertions
-		return "<b>" + originalText + "</b>";
-	}
-	
-	public void doSearching(String queryString) throws Exception
-	{
-		QueryParser parser=new QueryParser(FIELD_NAME, new StandardAnalyzer());
-		query = parser.parse(queryString);
-		doSearching(query);
-	}
-	public void doSearching(Query unReWrittenQuery) throws Exception
-	{
-		searcher = new IndexSearcher(ramDir);
-		//for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query!
-		query=unReWrittenQuery.rewrite(reader);
-		System.out.println("Searching for: " + query.toString(FIELD_NAME));
-		hits = searcher.search(query);
-	}	
-
-	void doStandardHighlights() throws Exception
-	{
-		Highlighter highlighter =new Highlighter(this,new QueryScorer(query));
-		highlighter.setTextFragmenter(new SimpleFragmenter(20));
-		for (int i = 0; i < hits.length(); i++)
-		{
-			String text = hits.doc(i).get(FIELD_NAME);
-			int maxNumFragmentsRequired = 2;
-			String fragmentSeparator = "...";
-			TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
-
-			String result =
-				highlighter.getBestFragments(
-					tokenStream,
-					text,
-					maxNumFragmentsRequired,
-					fragmentSeparator);
-			System.out.println("\t" + result);
-		}
-	}
-
-	/*
-	 * @see TestCase#setUp()
-	 */
-	protected void setUp() throws Exception
-	{
-		ramDir = new RAMDirectory();
-		IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(), true);
-		for (int i = 0; i < texts.length; i++)
-		{
-			addDoc(writer, texts[i]);
-		}
-
-		writer.optimize();
-		writer.close();
-		reader = IndexReader.open(ramDir);
-		numHighlights = 0;
-	}
-
-	private void addDoc(IndexWriter writer, String text) throws IOException
-	{
-		Document d = new Document();
-		Field f = new Field(FIELD_NAME, text,Field.Store.YES, Field.Index.TOKENIZED);
-		d.add(f);
-		writer.addDocument(d);
-
-	}
-
-	/*
-	 * @see TestCase#tearDown()
-	 */
-	protected void tearDown() throws Exception
-	{
-		super.tearDown();
-	}
+      public void run() throws Exception {
+        String s = "Hi-Speed10 foo";
 
-}
+        Query query;
+        Highlighter highlighter;
+        String result;
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("foo");
+        highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2(), s, 3, "...");
+        assertEquals("Hi-Speed10 <B>foo</B>", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("10");
+        highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2(), s, 3, "...");
+        assertEquals("Hi-Speed<B>10</B> foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi");
+        highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2(), s, 3, "...");
+        assertEquals("<B>Hi</B>-Speed10 foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("speed");
+        highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2(), s, 3, "...");
+        assertEquals("Hi-<B>Speed</B>10 foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hispeed");
+        highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2(), s, 3, "...");
+        assertEquals("<B>Hi-Speed</B>10 foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi speed");
+        highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2(), s, 3, "...");
+        assertEquals("<B>Hi-Speed</B>10 foo", result);
+
+        // ///////////////// same tests, just put the bigger overlapping token
+        // first
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("foo");
+        highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
+        assertEquals("Hi-Speed10 <B>foo</B>", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("10");
+        highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
+        assertEquals("Hi-Speed<B>10</B> foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi");
+        highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
+        assertEquals("<B>Hi</B>-Speed10 foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("speed");
+        highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
+        assertEquals("Hi-<B>Speed</B>10 foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hispeed");
+        highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
+        assertEquals("<B>Hi-Speed</B>10 foo", result);
+
+        query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi speed");
+        highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
+        result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
+        assertEquals("<B>Hi-Speed</B>10 foo", result);
+      }
+    };
+
+    helper.start();
+  }
+
+  /*
+   * 
+   * public void testBigramAnalyzer() throws IOException, ParseException {
+   * //test to ensure analyzers with none-consecutive start/end offsets //dont
+   * double-highlight text //setup index 1 RAMDirectory ramDir = new
+   * RAMDirectory(); Analyzer bigramAnalyzer=new CJKAnalyzer(); IndexWriter
+   * writer = new IndexWriter(ramDir,bigramAnalyzer , true); Document d = new
+   * Document(); Field f = new Field(FIELD_NAME, "java abc def", true, true,
+   * true); d.add(f); writer.addDocument(d); writer.close(); IndexReader reader =
+   * IndexReader.open(ramDir);
+   * 
+   * IndexSearcher searcher=new IndexSearcher(reader); query =
+   * QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer);
+   * System.out.println("Searching for: " + query.toString(FIELD_NAME)); hits =
+   * searcher.search(query);
+   * 
+   * Highlighter highlighter = new Highlighter(this,new
+   * QueryFragmentScorer(query));
+   * 
+   * for (int i = 0; i < hits.length(); i++) { String text =
+   * hits.doc(i).get(FIELD_NAME); TokenStream
+   * tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
+   * String highlightedText = highlighter.getBestFragment(tokenStream,text);
+   * System.out.println(highlightedText); } }
+   */
+
+  public String highlightTerm(String originalText, TokenGroup group) {
+    if (group.getTotalScore() <= 0) {
+      return originalText;
+    }
+    numHighlights++; // update stats used in assertions
+    return "<B>" + originalText + "</B>";
+  }
+
+  public void doSearching(String queryString) throws Exception {
+    QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+    query = parser.parse(queryString);
+    doSearching(query);
+  }
+
+  public void doSearching(Query unReWrittenQuery) throws Exception {
+    searcher = new IndexSearcher(ramDir);
+    // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc)
+    // you must use a rewritten query!
+    query = unReWrittenQuery.rewrite(reader);
+    System.out.println("Searching for: " + query.toString(FIELD_NAME));
+    hits = searcher.search(query);
+  }
+
+  public void assertExpectedHighlightCount(final int maxNumFragmentsRequired,
+      final int expectedHighlights) throws Exception {
+    for (int i = 0; i < hits.length(); i++) {
+      String text = hits.doc(i).get(FIELD_NAME);
+      CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
+          new StringReader(text)));
+      Highlighter highlighter = new Highlighter(this,
+          new SpanScorer(query, FIELD_NAME, tokenStream));
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      tokenStream.reset();
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      System.out.println("\t" + result);
+
+      assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+          numHighlights == expectedHighlights);
+    }
+  }
 
+  /*
+   * @see TestCase#setUp()
+   */
+  protected void setUp() throws Exception {
+    ramDir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(), true);
+    for (int i = 0; i < texts.length; i++) {
+      addDoc(writer, texts[i]);
+    }
+
+    writer.optimize();
+    writer.close();
+    reader = IndexReader.open(ramDir);
+    numHighlights = 0;
+  }
+
+  private void addDoc(IndexWriter writer, String text) throws IOException {
+    Document d = new Document();
+    Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.TOKENIZED);
+    d.add(f);
+    writer.addDocument(d);
+
+  }
+
+  /*
+   * @see TestCase#tearDown()
+   */
+  protected void tearDown() throws Exception {
+    super.tearDown();
+  }
+
+}
 
-//===================================================================
-//========== BEGIN TEST SUPPORTING CLASSES
-//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
-//========== MADE MORE GENERALLY USEFUL.
+// ===================================================================
+// ========== BEGIN TEST SUPPORTING CLASSES
+// ========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
+// ========== MADE MORE GENERALLY USEFUL.
 // TODO - make synonyms all interchangeable with each other and produce
 // a version that does hyponyms - the "is a specialised type of ...."
 // so that car = audi, bmw and volkswagen but bmw != audi so different
 // behaviour to synonyms
-//===================================================================
+// ===================================================================
+
+class SynonymAnalyzer extends Analyzer {
+  private Map synonyms;
 
-class SynonymAnalyzer extends Analyzer
-{
-	private Map synonyms;
-
-	public SynonymAnalyzer(Map synonyms)
-	{
-		this.synonyms = synonyms;
-	}
-
-	/* (non-Javadoc)
-	 * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
-	 */
-	public TokenStream tokenStream(String arg0, Reader arg1)
-	{
-		return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
-	}
+  public SynonymAnalyzer(Map synonyms) {
+    this.synonyms = synonyms;
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
+   *      java.io.Reader)
+   */
+  public TokenStream tokenStream(String arg0, Reader arg1) {
+    return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
+  }
 }
 
 /**
  * Expands a token stream with synonyms (TODO - make the synonyms analyzed by choice of analyzer)
+ * 
  * @author MAHarwood
  */
-class SynonymTokenizer extends TokenStream
-{
-	private TokenStream realStream;
-	private Token currentRealToken = null;
-	private Map synonyms;
-	StringTokenizer st = null;
-	public SynonymTokenizer(TokenStream realStream, Map synonyms)
-	{
-		this.realStream = realStream;
-		this.synonyms = synonyms;
-	}
-	public Token next() throws IOException
-	{
-		if (currentRealToken == null)
-		{
-			Token nextRealToken = realStream.next();
-			if (nextRealToken == null)
-			{
-				return null;
-			}
-			String expansions = (String) synonyms.get(nextRealToken.termText());
-			if (expansions == null)
-			{
-				return nextRealToken;
-			}
-			st = new StringTokenizer(expansions, ",");
-			if (st.hasMoreTokens())
-			{
-				currentRealToken = nextRealToken;
-			}
-			return currentRealToken;
-		}
-		else
-		{
-			String nextExpandedValue = st.nextToken();
-			Token expandedToken =
-				new Token(
-					nextExpandedValue,
-					currentRealToken.startOffset(),
-					currentRealToken.endOffset());
-			expandedToken.setPositionIncrement(0);
-			if (!st.hasMoreTokens())
-			{
-				currentRealToken = null;
-				st = null;
-			}
-			return expandedToken;
-		}
-	}
+class SynonymTokenizer extends TokenStream {
+  private TokenStream realStream;
+  private Token currentRealToken = null;
+  private Map synonyms;
+  StringTokenizer st = null;
+
+  public SynonymTokenizer(TokenStream realStream, Map synonyms) {
+    this.realStream = realStream;
+    this.synonyms = synonyms;
+  }
 
-}
+  public Token next() throws IOException {
+    if (currentRealToken == null) {
+      Token nextRealToken = realStream.next();
+      if (nextRealToken == null) {
+        return null;
+      }
+      String expansions = (String) synonyms.get(nextRealToken.termText());
+      if (expansions == null) {
+        return nextRealToken;
+      }
+      st = new StringTokenizer(expansions, ",");
+      if (st.hasMoreTokens()) {
+        currentRealToken = nextRealToken;
+      }
+      return currentRealToken;
+    } else {
+      String nextExpandedValue = st.nextToken();
+      Token expandedToken = new Token(nextExpandedValue, currentRealToken.startOffset(),
+          currentRealToken.endOffset());
+      expandedToken.setPositionIncrement(0);
+      if (!st.hasMoreTokens()) {
+        currentRealToken = null;
+        st = null;
+      }
+      return expandedToken;
+    }
+  }
 
+  static abstract class TestHighlightRunner {
+    static final int STANDARD = 0;
+    static final int SPAN = 1;
+    int mode = STANDARD;
+
+    public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream,
+        Formatter formatter) {
+      if (mode == STANDARD) {
+        return new Highlighter(formatter, new QueryScorer(query));
+      } else if (mode == SPAN) {
+        CachingTokenFilter tokenStream = new CachingTokenFilter(stream);
+        Highlighter highlighter;
+        try {
+          highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName, tokenStream));
+          tokenStream.reset();
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+
+        return highlighter;
+      } else {
+        throw new RuntimeException("Unknown highlight mode");
+      }
+    }
+
+    Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) {
+      if (mode == STANDARD) {
+        return new Highlighter(formatter, new QueryScorer(weightedTerms));
+      } else if (mode == SPAN) {
+        Highlighter highlighter;
+
+        highlighter = new Highlighter(formatter, new SpanScorer((WeightedSpanTerm[]) weightedTerms));
+
+        return highlighter;
+      } else {
+        throw new RuntimeException("Unknown highlight mode");
+      }
+    }
 
+    void doStandardHighlights(Analyzer analyzer, Hits hits, Query query, Formatter formatter)
+        throws Exception {
+
+      for (int i = 0; i < hits.length(); i++) {
+        String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
+        int maxNumFragmentsRequired = 2;
+        String fragmentSeparator = "...";
+        Scorer scorer = null;
+        TokenStream tokenStream = null;
+        if (mode == SPAN) {
+          tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
+              new StringReader(text)));
+          scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME,
+              (CachingTokenFilter) tokenStream);
+        } else if (mode == STANDARD) {
+          scorer = new QueryScorer(query);
+          tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
+        }
+        Highlighter highlighter = new Highlighter(formatter, scorer);
+        if (mode == SPAN) {
+          ((CachingTokenFilter) tokenStream).reset();
+        }
+        highlighter.setTextFragmenter(new SimpleFragmenter(20));
+
+        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+            fragmentSeparator);
+        System.out.println("\t" + result);
+      }
+    }
+
+    abstract void run() throws Exception;
+
+    void start() throws Exception {
+      System.out.println("Run standard");
+      run();
+      System.out.println("Run span");
+      mode = SPAN;
+      run();
+    }
+  }
+}



Mime
View raw message