lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mharw...@apache.org
Subject svn commit: r421413 - in /lucene/java/trunk/contrib/xml-query-parser/src: java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java test/org/apache/lucene/xmlparser/LikeThisQuery.xml
Date Wed, 12 Jul 2006 21:09:16 GMT
Author: mharwood
Date: Wed Jul 12 14:09:15 2006
New Revision: 421413

URL: http://svn.apache.org/viewvc?rev=421413&view=rev
Log:
Exposed support for stop words facility through  LikeThisQueryBuilder and updated example
LikeThisQuery.xml to illustrate usage 

Modified:
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/LikeThisQuery.xml

Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java?rev=421413&r1=421412&r2=421413&view=diff
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
(original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
Wed Jul 12 14:09:15 2006
@@ -3,7 +3,14 @@
  */
 package org.apache.lucene.xmlparser.builders;
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.search.similar.MoreLikeThisQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.xmlparser.DOMUtils;
@@ -43,10 +50,41 @@
 				fields[i]=fields[i].trim();
 			}
 		}
+		
+		//Parse any "stopWords" attribute
+		//TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then 
+		//I use all analyzers/fields to generate multi-field compatible stop list
+		String stopWords=e.getAttribute("stopWords");
+		Set stopWordsSet=null;
+		if((stopWords!=null)&&(fields!=null))
+		{
+		    stopWordsSet=new HashSet();
+		    for (int i = 0; i < fields.length; i++)
+            {
+                TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
+                try
+                {
+	                Token stopToken=ts.next();
+	                while(stopToken!=null)
+	                {
+	                    stopWordsSet.add(stopToken.termText());
+	                    stopToken=ts.next();
+	                }
+                }
+                catch(IOException ioe)
+                {
+                    throw new ParserException("IoException parsing stop words list in "
+                            +getClass().getName()+":"+ioe.getLocalizedMessage());
+                }
+            }
+		}
+		
+		
 		MoreLikeThisQuery mlt=new MoreLikeThisQuery(DOMUtils.getText(e),fields,analyzer);
 		mlt.setMaxQueryTerms(DOMUtils.getAttribute(e,"maxQueryTerms",defaultMaxQueryTerms));
 		mlt.setMinTermFrequency(DOMUtils.getAttribute(e,"minTermFrequency",defaultMinTermFrequency));
 		mlt.setPercentTermsToMatch(DOMUtils.getAttribute(e,"percentTermsToMatch",defaultPercentTermsToMatch)/100);
+		mlt.setStopWords(stopWordsSet);
 
 		mlt.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
 

Modified: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/LikeThisQuery.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/LikeThisQuery.xml?rev=421413&r1=421412&r2=421413&view=diff
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/LikeThisQuery.xml
(original)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/LikeThisQuery.xml
Wed Jul 12 14:09:15 2006
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<LikeThisQuery percentTermsToMatch="5">
+<LikeThisQuery percentTermsToMatch="5" stopWords="Reuter">
 IRAQI TROOPS REPORTED PUSHING BACK IRANIANS Iraq said today its troops were pushing Iranian
forces out of 
 	positions they had initially occupied when they launched a new offensive near the southern
port of 
 	Basra early yesterday.     A High Command communique said Iraqi troops had won a significant
victory 



Mime
View raw message