lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Koji Sekiguchi <k...@r.email.ne.jp>
Subject Re: Highlighting large documents (Lucene 3.0.0)
Date Mon, 01 Mar 2010 16:17:22 GMT
-Arne- wrote:
> Hi Koji,
> thanks for your answer. Can you help me a once again?  What exactly  I
> suposse to do? 
>
>   
The concrete program in my mind here:

public class TestHighlightTruncatedSearchQuery {
 
  static Directory dir = new RAMDirectory();
  static Analyzer analyzer = new BiGramAnalyzer();
  static final String[] DOCS = {
    "import org.apache.lucene.analysis.Analyzer;",
    "import org.apache.lucene.analysis.TokenStream;",
    "import org.apache.lucene.analysis.ngram.NGramTokenizer;",
    "import org.apache.lucene.index.IndexWriter;",
    "import org.apache.lucene.index.IndexWriter.MaxFieldLength;",
    "import org.apache.lucene.store.Directory;",
    "import org.apache.lucene.store.RAMDirectory;"
  };
  static final String F = "f";

  public static void main(String[] args) throws Exception {
    makeIndex();
    searchIndex();
  }

  static void makeIndex() throws IOException {
    IndexWriter writer = new IndexWriter( dir, analyzer, true, 
MaxFieldLength.LIMITED );
    for( String value : DOCS ){
      Document doc = new Document();
      doc.add( new Field( F, value, Store.YES, Index.ANALYZED, 
TermVector.WITH_POSITIONS_OFFSETS ) );
      writer.addDocument( doc );
    }
    writer.close();
  }
 
  static void searchIndex() throws Exception {
    IndexSearcher searcher = new IndexSearcher( dir, true );
    IndexReader reader = searcher.getIndexReader();
    QueryParser parser = new QueryParser( F, analyzer );
    // use "Direct" rather than "Direct"
    Query query = parser.parse( "Direct" );
    FastVectorHighlighter h = new FastVectorHighlighter();
    FieldQuery fieldQuery = h.getFieldQuery( query );
    TopDocs docs = searcher.search( query, 10 );
    for( ScoreDoc scoreDoc : docs.scoreDocs ){
      String snippet = h.getBestFragment( fieldQuery, reader, 
scoreDoc.doc, F, 100 );
      System.out.println( scoreDoc.doc + " : " + snippet );
    }
    searcher.close();
  }
 
  static class BiGramAnalyzer extends Analyzer {
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new NGramTokenizer( reader, 2, 2 );
    }
  }
}


Koji

-- 
http://www.rondhuit.com/en/


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message