lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Rebecca Watson <bec.wat...@gmail.com>
Subject Re: Problem with Wildcard searches in Solr
Date Tue, 13 Jul 2010 08:18:50 GMT
Hi,

earlier this week i started messing with getting wildcard queries to
be analysed....

i've got some weird analysers doing stemming/lowercasing and writing
in the same rules into a custom queryparser didn't seem logical given
i just want the analysers to apply as they do at index time....

i came up with the hack below, which is just a modified version of
the LuceneQParserPlugin ie. the solr default one which creates
a SolrQueryParser query parser.

in the SolrQueryParser I overwrite the "getWildcardQuery" function so
that I insert a call to my method - "myWildcardQuery".

myWildcardQuery method converts the wildcard term into an analysed
version which it returns (and at least lowercases the if analysis fails
for some reason).

the myWildcardQuery method is just pulling in code from
lucene's QueryParser.getFieldQuery -- so all this code is a magical giant
cut and paste job right now (which you'll see when you look at the lucene/solr
classes involved!)

you use this custom queryparser in the usual way i.e.
by registering the queryparser in the solrconfig.xml file:
<queryParser name="ilexirQparser"
class="com.ilexir.solr.search.ilexirQParserPlugin"/>
then call that queryparser in your request handler:
<requestHandler name="ilexir" class="solr.SearchHandler" default="true">
    <!-- default values for query parameters -->
     <lst name="defaults">
		 <str name="defType">ilexirQparser</str>
       <str name="echoParams">explicit</str>
		 <int name="rows">10</int>
      <int name="start">0</int>
       <str name="fl">*,score</str>
      <str name="version">2.2</str>
      <str name="wt">standard</str>
      <str name="indent">on</str>
     </lst>
     <arr name="last-components">
		 <str>spellcheck</str>
		 <str>tvComponent</str>
    </arr>
  </requestHandler>

i enable the leading wildcard queries using the reversedwildcard filter as per
previous email i.e. in index-time analyser add in:
<filter class="solr.ReversedWildcardFilterFactory" />
(not at query time) -- then the lucene query parser picks up the use of this
filter and allows leading wildcard queries.

of course, non of this is going to sort out trying to match against the query
"co?mput?r" because you've probably stemmed "computer" to "comput" or something
at index time -- but if you add in a copyfield to an extra field that
isn't stemmed
at query time, then query both the original + the non-stemmed field (boost
accordingly -- i.e. you might want to boost the original non-stemmed field
higher!) you'll get the right match then :)

i'd be interested to hear from lucene/solr contributors why wildcards aren't
analysed in general anyway?

anyway hope that helps :)

bec

----------------------



import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.WildcardQuery;
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.LuceneQParserPlugin;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrQueryParser;

/**
 * modifies the code from LuceneQParserPlugin i.e. the default query parser
 * plugin used by solr.
 * @author bec
 */
public class ilexirQParserPlugin extends LuceneQParserPlugin {
	public static String NAME = "lucene";

	public void init(NamedList args) {
	}

	public QParser createParser(String qstr, SolrParams localParams,
			SolrParams params, SolrQueryRequest req) {
		return new ilexirQParser(qstr, localParams, params, req);
	}
}

class ilexirQParser extends QParser {
	String sortStr;
	SolrQueryParser lparser;

	public ilexirQParser(String qstr, SolrParams localParams,
			SolrParams params, SolrQueryRequest req) {
		super(qstr, localParams, params, req);
	}

	public Query parse() throws ParseException {
		String qstr = getString();

		String defaultField = getParam(CommonParams.DF);
		if (defaultField == null) {
			defaultField = getReq().getSchema().getDefaultSearchFieldName();
		}
		lparser = new SolrQueryParser(this, defaultField) {

			/**
			 * adapted from lucene's QueryParser.getFieldQuery !!
			 *
			 * @param field
			 * @param termStr
			 */
			private String myWildcardQuery(String field, String termStr) {
				System.out
						.println("ILEXIR: ORIGINAL WILDCARD QUERY:" + termStr);
				// get the corresponding analyser - this one is
				// from the schema file -- the query one!!
				// i.e. YAY!!
				Analyzer analyzer = this.getAnalyzer();
				TokenStream source;
				try {
					source = analyzer.reusableTokenStream(field,
							new StringReader(termStr));
					source.reset();
				} catch (IOException e) {
					source = analyzer.tokenStream(field, new StringReader(
							termStr));
				}
				CachingTokenFilter buffer = new CachingTokenFilter(source);
				TermAttribute termAtt = null;
				PositionIncrementAttribute posIncrAtt = null;

				boolean success = false;
				try {
					buffer.reset();
					success = true;
				} catch (IOException e) {
					// success==false if we hit an exception
				}
				if (success) {
					if (buffer.hasAttribute(TermAttribute.class)) {
						termAtt = (TermAttribute) buffer
								.getAttribute(TermAttribute.class);
					}
					if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
						posIncrAtt = (PositionIncrementAttribute) buffer
								.getAttribute(PositionIncrementAttribute.class);
					}
				}

				boolean hasMoreTokens = false;
				if (termAtt != null) {
					try {
						hasMoreTokens = buffer.incrementToken();
						// should be a single analysed term!:
						System.out
								.println("ILEXIR: RETURNING ANALYSED WILDCARD QUERY TERM:"
										+ termAtt.term());
						return termAtt.term();
						/*
						 * while (hasMoreTokens) { System.out.println("TERM:" +
						 * termAtt.term()); hasMoreTokens =
						 * buffer.incrementToken(); }
						 */
					} catch (IOException e) {
						System.out
								.println("ILEXIR: ilexirQParserPlugin.myWildcardQuery error:"
										+ e.getMessage());
						e.printStackTrace();
					}
				}
				termStr = termStr.toLowerCase();
				// return original wildcard term if errors occurred!
				System.out
						.println("ILEXIR: RETURNING LOWERCASED WILDCARD QUERY TERM:"
								+ termStr);
				return termStr;
			}

			@Override
			protected Query getWildcardQuery(String field, String termStr)
					throws ParseException {
				// *:* -> MatchAllDocsQuery
				if ("*".equals(field) && "*".equals(termStr)) {
					return newMatchAllDocsQuery();
				}
				// switch wildcard term to the analysed version!!
				termStr = this.myWildcardQuery(field, termStr);

				// can we use reversed wildcards in this field?
				String type = schema.getFieldType(field).getTypeName();
				ReversedWildcardFilterFactory factory = leadingWildcards
						.get(type);
				if (factory != null && factory.shouldReverse(termStr)) {
					termStr = ReverseStringFilter.reverse(termStr
							+ factory.getMarkerChar());
				}
				Query q = super.getWildcardQuery(field, termStr);
				if (q instanceof WildcardQuery) {
					// use a constant score query to avoid overflowing clauses
					WildcardQuery wildcardQuery = new WildcardQuery(
							((WildcardQuery) q).getTerm());
					return wildcardQuery;
				}
				return q;
			}
		};

		// these could either be checked & set here, or in the SolrQueryParser
		// constructor
		String opParam = getParam(QueryParsing.OP);
		if (opParam != null) {
			lparser
					.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND
							: QueryParser.Operator.OR);
		} else {
			// try to get default operator from schema
			QueryParser.Operator operator = getReq().getSchema()
					.getSolrQueryParser(null).getDefaultOperator();
			lparser
					.setDefaultOperator(null == operator ? QueryParser.Operator.OR
							: operator);
		}

		return lparser.parse(qstr);
	}

	public String[] getDefaultHighlightFields() {
		return new String[] { lparser.getField() };
	}

}

Mime
View raw message