lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r614884 - /lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
Date Thu, 24 Jan 2008 14:36:49 GMT
Author: gsingers
Date: Thu Jan 24 06:36:46 2008
New Revision: 614884

URL: http://svn.apache.org/viewvc?rev=614884&view=rev
Log:
LUCENE-1127: added couple of convenience methods to TokenSources

Modified:
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=614884&r1=614883&r2=614884&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
Thu Jan 24 06:36:46 2008
@@ -43,6 +43,36 @@
  */
 public class TokenSources
 {
+  /**
+   * A convenience method that tries to first get a TermPositionVector for the specified
docId, then, falls back to
+   * using the passed in {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
 This is useful when
+   * you already have the document, but would prefer to use the vector first.
+   * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try and get
the vector from
+   * @param docId The docId to retrieve.
+   * @param field The field to retrieve on the document
+   * @param doc The document to fall back on
+   * @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't
exist
+   * @return The {@link org.apache.lucene.analysis.TokenStream} for the {@link org.apache.lucene.document.Fieldable}
on the {@link org.apache.lucene.document.Document}
+   * @throws IOException if there was an error loading
+   */
+  public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field,
Document doc, Analyzer analyzer) throws IOException{
+    TokenStream ts=null;
+
+		TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
+		if(tfv!=null)
+		{
+		    if(tfv instanceof TermPositionVector)
+		    {
+		        ts=getTokenStream((TermPositionVector) tfv);
+		    }
+		}
+		//No token info stored so fall back to analyzing raw content
+		if(ts==null)
+		{
+		    ts=getTokenStream(doc,field,analyzer);
+		}
+		return ts;
+  }
     /**
      * A convenience method that tries a number of approaches to getting a token stream.
      * The cost of finding there are no termVectors in the index is minimal (1000 invocations
still 
@@ -219,15 +249,21 @@
     //convenience method
     public static TokenStream getTokenStream(IndexReader reader,int docId, String field,Analyzer
analyzer) throws IOException
     {
-		Document doc=reader.document(docId);
-		String contents=doc.get(field);
+		  Document doc=reader.document(docId);
+		  return getTokenStream(doc, field, analyzer);
+    }
+    
+  public static TokenStream getTokenStream(Document doc, String field, Analyzer analyzer){
+    String contents=doc.get(field);
 		if(contents==null)
 		{
-		    throw new IllegalArgumentException("Field "+field +" in document #"+docId+ " is not
stored and cannot be analyzed");
+		    throw new IllegalArgumentException("Field "+field +" in document is not stored and
cannot be analyzed");
 		}
-        return analyzer.tokenStream(field,new StringReader(contents));
-    }
-    
-    
+        return getTokenStream(field, contents, analyzer);
+  }
+  //conevenience method
+  public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer){
+    return analyzer.tokenStream(field,new StringReader(contents));
+  }
 
 }



Mime
View raw message