Return-Path: Delivered-To: apmail-lucene-solr-commits-archive@locus.apache.org Received: (qmail 97138 invoked from network); 20 Mar 2008 22:40:00 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 20 Mar 2008 22:40:00 -0000 Received: (qmail 34812 invoked by uid 500); 20 Mar 2008 22:39:58 -0000 Delivered-To: apmail-lucene-solr-commits-archive@lucene.apache.org Received: (qmail 34782 invoked by uid 500); 20 Mar 2008 22:39:58 -0000 Mailing-List: contact solr-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: solr-dev@lucene.apache.org Delivered-To: mailing list solr-commits@lucene.apache.org Received: (qmail 34773 invoked by uid 99); 20 Mar 2008 22:39:58 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 20 Mar 2008 15:39:58 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 20 Mar 2008 22:39:14 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 322431A983A; Thu, 20 Mar 2008 15:39:32 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r639490 - in /lucene/solr/trunk: ./ src/java/org/apache/solr/core/ src/java/org/apache/solr/highlight/ src/test/org/apache/solr/highlight/ src/test/test-files/solr/conf/ Date: Thu, 20 Mar 2008 22:39:29 -0000 To: solr-commits@lucene.apache.org From: klaas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080320223932.322431A983A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: klaas Date: Thu Mar 20 15:39:27 2008 New Revision: 639490 URL: http://svn.apache.org/viewvc?rev=639490&view=rev Log: SOLR-386: configurable SolrHighlighter Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (with props) lucene/solr/trunk/src/test/org/apache/solr/highlight/DummyHighlighter.java (with props) lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterConfigTest.java (with props) lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-highlight.xml (with props) Modified: lucene/solr/trunk/CHANGES.txt lucene/solr/trunk/src/java/org/apache/solr/core/SolrCore.java lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java Modified: lucene/solr/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=639490&r1=639489&r2=639490&view=diff ============================================================================== --- lucene/solr/trunk/CHANGES.txt (original) +++ lucene/solr/trunk/CHANGES.txt Thu Mar 20 15:39:27 2008 @@ -557,6 +557,11 @@ 35. SOLR-249: Deprecated SolrException( int, ... ) constructors in favor of constructors that takes an ErrorCode enum. This will ensure that all SolrExceptions use a valid HTTP status code. (ryan) + +36. SOLR-386: Abstracted SolrHighlighter and moved existing implementation + to DefaultSolrHighlighter. Adjusted SolrCore and solrconfig.xml so + that highlighter is configurable via a class attribute. Allows users + to use their own highlighter implementation. (Tricia Williams via klaas) Changes in runtime behavior 1. Highlighting using DisMax will only pick up terms from the main Modified: lucene/solr/trunk/src/java/org/apache/solr/core/SolrCore.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/core/SolrCore.java?rev=639490&r1=639489&r2=639490&view=diff ============================================================================== --- lucene/solr/trunk/src/java/org/apache/solr/core/SolrCore.java (original) +++ lucene/solr/trunk/src/java/org/apache/solr/core/SolrCore.java Thu Mar 20 15:39:27 2008 @@ -49,6 +49,7 @@ import org.apache.solr.handler.component.MoreLikeThisComponent; import org.apache.solr.handler.component.QueryComponent; import org.apache.solr.handler.component.SearchComponent; +import org.apache.solr.highlight.DefaultSolrHighlighter; import org.apache.solr.highlight.SolrHighlighter; import org.apache.solr.request.JSONResponseWriter; import org.apache.solr.request.PythonResponseWriter; @@ -305,7 +306,10 @@ private UpdateHandler createUpdateHandler(String className) { return createInstance(className, UpdateHandler.class, "Update Handler"); } - + + private SolrHighlighter createHighlighter(String className) { + return createInstance(className, SolrHighlighter.class, "Highlighter"); + } /** * @return the last core initialized. If you are using multiple cores, @@ -379,8 +383,9 @@ reqHandlers = new RequestHandlers(this); reqHandlers.initHandlersFromConfig( solrConfig ); - // TODO? could select the highlighter implementation - highlighter = new SolrHighlighter(); + highlighter = createHighlighter( + solrConfig.get("highlighting/@class", DefaultSolrHighlighter.class.getName()) + ); highlighter.initalize( solrConfig ); try { Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=639490&view=auto ============================================================================== --- lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (added) +++ lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Thu Mar 20 15:39:27 2008 @@ -0,0 +1,412 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.highlight; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +import javax.xml.xpath.XPathConstants; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Fragmenter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.search.highlight.TokenSources; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.HighlightParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.Config; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.SolrPluginUtils; +import org.apache.solr.util.plugin.NamedListPluginLoader; +import org.w3c.dom.NodeList; + +/** + * + * @since solr 1.3 + */ +public class DefaultSolrHighlighter extends SolrHighlighter +{ + + public void initalize( final Config config ) + { + formatters.clear(); + fragmenters.clear(); + + // Load the fragmenters + String xpath = "highlighting/fragmenter"; + NamedListPluginLoader fragloader = new NamedListPluginLoader( xpath, fragmenters ); + SolrFragmenter frag = fragloader.load( config.getResourceLoader(), (NodeList)config.evaluate( xpath, XPathConstants.NODESET ) ); + if( frag == null ) { + frag = new GapFragmenter(); + } + fragmenters.put( "", frag ); + fragmenters.put( null, frag ); + + // Load the formatters + xpath = "highlighting/formatter"; + NamedListPluginLoader fmtloader = new NamedListPluginLoader( xpath, formatters ); + SolrFormatter fmt = fmtloader.load( config.getResourceLoader(), (NodeList)config.evaluate( xpath, XPathConstants.NODESET ) ); + if( fmt == null ) { + fmt = new HtmlFormatter(); + } + formatters.put( "", fmt ); + formatters.put( null, fmt ); + } + + + /** + * Return a Highlighter appropriate for this field. + * @param query The current Query + * @param fieldName The name of the field + * @param request The current SolrQueryRequest + */ + protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) { + SolrParams params = request.getParams(); + Highlighter highlighter = new Highlighter( + getFormatter(fieldName, params), + getQueryScorer(query, fieldName, request)); + highlighter.setTextFragmenter(getFragmenter(fieldName, params)); + highlighter.setMaxDocBytesToAnalyze(params.getFieldInt( + fieldName, HighlightParams.MAX_CHARS, + Highlighter.DEFAULT_MAX_DOC_BYTES_TO_ANALYZE)); + return highlighter; + } + + /** + * Return a QueryScorer suitable for this Query and field. + * @param query The current query + * @param fieldName The name of the field + * @param request The SolrQueryRequest + */ + protected QueryScorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) { + boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false); + if (reqFieldMatch) { + return new QueryScorer(query, request.getSearcher().getReader(), fieldName); + } + else { + return new QueryScorer(query); + } + } + + /** + * Return the max number of snippets for this field. If this has not + * been configured for this field, fall back to the configured default + * or the solr default. + * @param fieldName The name of the field + * @param params The params controlling Highlighting + */ + protected int getMaxSnippets(String fieldName, SolrParams params) { + return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1); + } + + /** + * Return whether adjacent fragments should be merged. + * @param fieldName The name of the field + * @param params The params controlling Highlighting + */ + protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){ + return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false); + } + + /** + * Return a formatter appropriate for this field. If a formatter + * has not been configured for this field, fall back to the configured + * default or the solr default (SimpleHTMLFormatter). + * + * @param fieldName The name of the field + * @param params The params controlling Highlighting + * @return An appropriate Formatter. + */ + protected Formatter getFormatter(String fieldName, SolrParams params ) + { + String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER ); + SolrFormatter formatter = formatters.get( str ); + if( formatter == null ) { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str ); + } + return formatter.getFormatter( fieldName, params ); + } + + /** + * Return a fragmenter appropriate for this field. If a fragmenter + * has not been configured for this field, fall back to the configured + * default or the solr default (GapFragmenter). + * + * @param fieldName The name of the field + * @param params The params controlling Highlighting + * @return An appropriate Fragmenter. + */ + protected Fragmenter getFragmenter(String fieldName, SolrParams params) + { + String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER ); + SolrFragmenter frag = fragmenters.get( fmt ); + if( frag == null ) { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt ); + } + return frag.getFragmenter( fieldName, params ); + } + + /** + * Generates a list of Highlighted query fragments for each item in a list + * of documents, or returns null if highlighting is disabled. + * + * @param docs query results + * @param query the query + * @param req the current request + * @param defaultFields default list of fields to summarize + * + * @return NamedList containing a NamedList for each document, which in + * turns contains sets (field, summary) pairs. + */ + @SuppressWarnings("unchecked") + public NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { + SolrParams params = req.getParams(); + if (!isHighlightingEnabled(params)) + return null; + + SolrIndexSearcher searcher = req.getSearcher(); + IndexSchema schema = searcher.getSchema(); + NamedList fragments = new SimpleOrderedMap(); + String[] fieldNames = getHighlightFields(query, req, defaultFields); + Document[] readDocs = new Document[docs.size()]; + { + // pre-fetch documents using the Searcher's doc cache + Set fset = new HashSet(); + for(String f : fieldNames) { fset.add(f); } + // fetch unique key if one exists. + SchemaField keyField = schema.getUniqueKeyField(); + if(null != keyField) + fset.add(keyField.getName()); + searcher.readDocs(readDocs, docs, fset); + } + + + // Highlight each document + DocIterator iterator = docs.iterator(); + for (int i = 0; i < docs.size(); i++) { + int docId = iterator.nextDoc(); + Document doc = readDocs[i]; + NamedList docSummaries = new SimpleOrderedMap(); + for (String fieldName : fieldNames) { + fieldName = fieldName.trim(); + String[] docTexts = doc.getValues(fieldName); + if (docTexts == null) continue; + + // get highlighter, and number of fragments for this field + Highlighter highlighter = getHighlighter(query, fieldName, req); + int numFragments = getMaxSnippets(fieldName, params); + boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); + + String[] summaries = null; + TextFragment[] frag; + if (docTexts.length == 1) { + // single-valued field + TokenStream tstream; + try { + // attempt term vectors + tstream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName); + } + catch (IllegalArgumentException e) { + // fall back to analyzer + tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[0])), 10); + } + frag = highlighter.getBestTextFragments(tstream, docTexts[0], mergeContiguousFragments, numFragments); + } + else { + // multi-valued field + MultiValueTokenStream tstream; + tstream = new MultiValueTokenStream(fieldName, docTexts, schema.getAnalyzer(), true); + frag = highlighter.getBestTextFragments(tstream, tstream.asSingleValue(), false, numFragments); + } + // convert fragments back into text + // TODO: we can include score and position information in output as snippet attributes + if (frag.length > 0) { + ArrayList fragTexts = new ArrayList(); + for (int j = 0; j < frag.length; j++) { + if ((frag[j] != null) && (frag[j].getScore() > 0)) { + fragTexts.add(frag[j].toString()); + } + } + summaries = fragTexts.toArray(new String[0]); + if (summaries.length > 0) + docSummaries.add(fieldName, summaries); + } + // no summeries made, copy text from alternate field + if (summaries == null || summaries.length == 0) { + String alternateField = req.getParams().getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); + if (alternateField != null && alternateField.length() > 0) { + String[] altTexts = doc.getValues(alternateField); + if (altTexts != null && altTexts.length > 0) + docSummaries.add(fieldName, altTexts); + } + } + + } + String printId = schema.printableUniqueKey(doc); + fragments.add(printId == null ? null : printId, docSummaries); + } + return fragments; + } +} + + + + +/** + * Helper class which creates a single TokenStream out of values from a + * multi-valued field. + */ +class MultiValueTokenStream extends TokenStream { + private String fieldName; + private String[] values; + private Analyzer analyzer; + private int curIndex; // next index into the values array + private int curOffset; // offset into concatenated string + private TokenStream currentStream; // tokenStream currently being iterated + private boolean orderTokenOffsets; + + /** Constructs a TokenStream for consecutively-analyzed field values + * + * @param fieldName name of the field + * @param values array of field data + * @param analyzer analyzer instance + */ + public MultiValueTokenStream(String fieldName, String[] values, + Analyzer analyzer, boolean orderTokenOffsets) { + this.fieldName = fieldName; + this.values = values; + this.analyzer = analyzer; + curIndex = -1; + curOffset = 0; + currentStream = null; + this.orderTokenOffsets=orderTokenOffsets; + } + + /** Returns the next token in the stream, or null at EOS. */ + @Override + public Token next() throws IOException { + int extra = 0; + if(currentStream == null) { + curIndex++; + if(curIndex < values.length) { + currentStream = analyzer.tokenStream(fieldName, + new StringReader(values[curIndex])); + if (orderTokenOffsets) currentStream = new TokenOrderingFilter(currentStream,10); + // add extra space between multiple values + if(curIndex > 0) + extra = analyzer.getPositionIncrementGap(fieldName); + } else { + return null; + } + } + Token nextToken = currentStream.next(); + if(nextToken == null) { + curOffset += values[curIndex].length(); + currentStream = null; + return next(); + } + // create an modified token which is the offset into the concatenated + // string of all values + Token offsetToken = new Token(nextToken.termText(), + nextToken.startOffset() + curOffset, + nextToken.endOffset() + curOffset); + offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10); + return offsetToken; + } + + /** + * Returns all values as a single String into which the Tokens index with + * their offsets. + */ + public String asSingleValue() { + StringBuilder sb = new StringBuilder(); + for(String str : values) + sb.append(str); + return sb.toString(); + } + +} + + +/** Orders Tokens in a window first by their startOffset ascending. + * endOffset is currently ignored. + * This is meant to work around fickleness in the highlighter only. It + * can mess up token positions and should not be used for indexing or querying. + */ +class TokenOrderingFilter extends TokenFilter { + private final int windowSize; + private final LinkedList queue = new LinkedList(); + private boolean done=false; + + protected TokenOrderingFilter(TokenStream input, int windowSize) { + super(input); + this.windowSize = windowSize; + } + + @Override + public Token next() throws IOException { + while (!done && queue.size() < windowSize) { + Token newTok = input.next(); + if (newTok==null) { + done=true; + break; + } + + // reverse iterating for better efficiency since we know the + // list is already sorted, and most token start offsets will be too. + ListIterator iter = queue.listIterator(queue.size()); + while(iter.hasPrevious()) { + if (newTok.startOffset() >= iter.previous().startOffset()) { + // insertion will be before what next() would return (what + // we just compared against), so move back one so the insertion + // will be after. + iter.next(); + break; + } + } + iter.add(newTok); + } + + return queue.isEmpty() ? null : queue.removeFirst(); + } +} + + Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java?rev=639490&r1=639489&r2=639490&view=diff ============================================================================== --- lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java (original) +++ lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java Thu Mar 20 15:39:27 2008 @@ -1,464 +1,95 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.apache.solr.highlight; import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.ListIterator; import java.util.Map; -import java.util.Set; import java.util.logging.Logger; -import javax.xml.xpath.XPathConstants; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Document; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; -import org.apache.lucene.search.highlight.TextFragment; -import org.apache.lucene.search.highlight.TokenSources; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.Config; import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.SchemaField; -import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; -import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.SolrPluginUtils; -import org.apache.solr.util.plugin.NamedListPluginLoader; -import org.w3c.dom.NodeList; -/** - * - * @since solr 1.3 - */ -public class SolrHighlighter +public abstract class SolrHighlighter { - public static Logger log = Logger.getLogger(SolrHighlighter.class.getName()); - - // Thread safe registry - protected final Map formatters = - Collections.synchronizedMap( new HashMap() ); - - // Thread safe registry - protected final Map fragmenters = - Collections.synchronizedMap( new HashMap() ); - - public void initalize( final Config config ) - { - formatters.clear(); - fragmenters.clear(); - - // Load the fragmenters - String xpath = "highlighting/fragmenter"; - NamedListPluginLoader fragloader = new NamedListPluginLoader( xpath, fragmenters ); - SolrFragmenter frag = fragloader.load( config.getResourceLoader(), (NodeList)config.evaluate( xpath, XPathConstants.NODESET ) ); - if( frag == null ) { - frag = new GapFragmenter(); - } - fragmenters.put( "", frag ); - fragmenters.put( null, frag ); - - // Load the formatters - xpath = "highlighting/formatter"; - NamedListPluginLoader fmtloader = new NamedListPluginLoader( xpath, formatters ); - SolrFormatter fmt = fmtloader.load( config.getResourceLoader(), (NodeList)config.evaluate( xpath, XPathConstants.NODESET ) ); - if( fmt == null ) { - fmt = new HtmlFormatter(); - } - formatters.put( "", fmt ); - formatters.put( null, fmt ); - } - - - /** - * Check whether Highlighting is enabled for this request. - * @param params The params controlling Highlighting - * @return true if highlighting enabled, false if not. - */ - public boolean isHighlightingEnabled(SolrParams params) { - return params.getBool(HighlightParams.HIGHLIGHT, false); - } - - /** - * Return a Highlighter appropriate for this field. - * @param query The current Query - * @param fieldName The name of the field - * @param request The current SolrQueryRequest - */ - protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) { - SolrParams params = request.getParams(); - Highlighter highlighter = new Highlighter( - getFormatter(fieldName, params), - getQueryScorer(query, fieldName, request)); - highlighter.setTextFragmenter(getFragmenter(fieldName, params)); - highlighter.setMaxDocBytesToAnalyze(params.getFieldInt( - fieldName, HighlightParams.MAX_CHARS, - Highlighter.DEFAULT_MAX_DOC_BYTES_TO_ANALYZE)); - return highlighter; - } - - /** - * Return a QueryScorer suitable for this Query and field. - * @param query The current query - * @param fieldName The name of the field - * @param request The SolrQueryRequest - */ - protected QueryScorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) { - boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false); - if (reqFieldMatch) { - return new QueryScorer(query, request.getSearcher().getReader(), fieldName); - } - else { - return new QueryScorer(query); - } - } - - /** - * Return a String array of the fields to be highlighted. - * Falls back to the programatic defaults, or the default search field if the list of fields - * is not specified in either the handler configuration or the request. - * @param query The current Query - * @param request The current SolrQueryRequest - * @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request. - */ - public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) { - String fields[] = request.getParams().getParams(HighlightParams.FIELDS); - - // if no fields specified in the request, or the handler, fall back to programmatic default, or default search field. - if(emptyArray(fields)) { - // use default search field if highlight fieldlist not specified. - if (emptyArray(defaultFields)) { - String defaultSearchField = request.getSchema().getDefaultSearchFieldName(); - fields = null == defaultSearchField ? new String[]{} : new String[]{defaultSearchField}; - } - else { - fields = defaultFields; - } - } - else if (fields.length == 1) { - // if there's a single request/handler value, it may be a space/comma separated list - fields = SolrPluginUtils.split(fields[0]); - } - - return fields; - } - - protected boolean emptyArray(String[] arr) { - return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0); - } - - /** - * Return the max number of snippets for this field. If this has not - * been configured for this field, fall back to the configured default - * or the solr default. - * @param fieldName The name of the field - * @param params The params controlling Highlighting - */ - protected int getMaxSnippets(String fieldName, SolrParams params) { - return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1); - } - - /** - * Return whether adjacent fragments should be merged. - * @param fieldName The name of the field - * @param params The params controlling Highlighting - */ - protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){ - return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false); - } - - /** - * Return a formatter appropriate for this field. If a formatter - * has not been configured for this field, fall back to the configured - * default or the solr default (SimpleHTMLFormatter). - * - * @param fieldName The name of the field - * @param params The params controlling Highlighting - * @return An appropriate Formatter. - */ - protected Formatter getFormatter(String fieldName, SolrParams params ) - { - String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER ); - SolrFormatter formatter = formatters.get( str ); - if( formatter == null ) { - throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str ); - } - return formatter.getFormatter( fieldName, params ); - } - - /** - * Return a fragmenter appropriate for this field. If a fragmenter - * has not been configured for this field, fall back to the configured - * default or the solr default (GapFragmenter). - * - * @param fieldName The name of the field - * @param params The params controlling Highlighting - * @return An appropriate Fragmenter. - */ - protected Fragmenter getFragmenter(String fieldName, SolrParams params) - { - String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER ); - SolrFragmenter frag = fragmenters.get( fmt ); - if( frag == null ) { - throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt ); - } - return frag.getFragmenter( fieldName, params ); - } - - /** - * Generates a list of Highlighted query fragments for each item in a list - * of documents, or returns null if highlighting is disabled. - * - * @param docs query results - * @param query the query - * @param req the current request - * @param defaultFields default list of fields to summarize - * - * @return NamedList containing a NamedList for each document, which in - * turns contains sets (field, summary) pairs. - */ - @SuppressWarnings("unchecked") - public NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { - SolrParams params = req.getParams(); - if (!isHighlightingEnabled(params)) - return null; - - SolrIndexSearcher searcher = req.getSearcher(); - IndexSchema schema = searcher.getSchema(); - NamedList fragments = new SimpleOrderedMap(); - String[] fieldNames = getHighlightFields(query, req, defaultFields); - Document[] readDocs = new Document[docs.size()]; - { - // pre-fetch documents using the Searcher's doc cache - Set fset = new HashSet(); - for(String f : fieldNames) { fset.add(f); } - // fetch unique key if one exists. - SchemaField keyField = schema.getUniqueKeyField(); - if(null != keyField) - fset.add(keyField.getName()); - searcher.readDocs(readDocs, docs, fset); - } - - - // Highlight each document - DocIterator iterator = docs.iterator(); - for (int i = 0; i < docs.size(); i++) { - int docId = iterator.nextDoc(); - Document doc = readDocs[i]; - NamedList docSummaries = new SimpleOrderedMap(); - for (String fieldName : fieldNames) { - fieldName = fieldName.trim(); - String[] docTexts = doc.getValues(fieldName); - if (docTexts == null) continue; - - // get highlighter, and number of fragments for this field - Highlighter highlighter = getHighlighter(query, fieldName, req); - int numFragments = getMaxSnippets(fieldName, params); - boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); - - String[] summaries = null; - TextFragment[] frag; - if (docTexts.length == 1) { - // single-valued field - TokenStream tstream; - try { - // attempt term vectors - tstream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName); - } - catch (IllegalArgumentException e) { - // fall back to analyzer - tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[0])), 10); - } - frag = highlighter.getBestTextFragments(tstream, docTexts[0], mergeContiguousFragments, numFragments); - } - else { - // multi-valued field - MultiValueTokenStream tstream; - tstream = new MultiValueTokenStream(fieldName, docTexts, schema.getAnalyzer(), true); - frag = highlighter.getBestTextFragments(tstream, tstream.asSingleValue(), false, numFragments); - } - // convert fragments back into text - // TODO: we can include score and position information in output as snippet attributes - if (frag.length > 0) { - ArrayList fragTexts = new ArrayList(); - for (int j = 0; j < frag.length; j++) { - if ((frag[j] != null) && (frag[j].getScore() > 0)) { - fragTexts.add(frag[j].toString()); - } - } - summaries = fragTexts.toArray(new String[0]); - if (summaries.length > 0) - docSummaries.add(fieldName, summaries); - } - // no summeries made, copy text from alternate field - if (summaries == null || summaries.length == 0) { - String alternateField = req.getParams().getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); - if (alternateField != null && alternateField.length() > 0) { - String[] altTexts = doc.getValues(alternateField); - if (altTexts != null && altTexts.length > 0) - docSummaries.add(fieldName, altTexts); - } - } - - } - String printId = schema.printableUniqueKey(doc); - fragments.add(printId == null ? null : printId, docSummaries); - } - return fragments; - } -} - - - - -/** - * Helper class which creates a single TokenStream out of values from a - * multi-valued field. - */ -class MultiValueTokenStream extends TokenStream { - private String fieldName; - private String[] values; - private Analyzer analyzer; - private int curIndex; // next index into the values array - private int curOffset; // offset into concatenated string - private TokenStream currentStream; // tokenStream currently being iterated - private boolean orderTokenOffsets; - - /** Constructs a TokenStream for consecutively-analyzed field values - * - * @param fieldName name of the field - * @param values array of field data - * @param analyzer analyzer instance - */ - public MultiValueTokenStream(String fieldName, String[] values, - Analyzer analyzer, boolean orderTokenOffsets) { - this.fieldName = fieldName; - this.values = values; - this.analyzer = analyzer; - curIndex = -1; - curOffset = 0; - currentStream = null; - this.orderTokenOffsets=orderTokenOffsets; - } - - /** Returns the next token in the stream, or null at EOS. */ - @Override - public Token next() throws IOException { - int extra = 0; - if(currentStream == null) { - curIndex++; - if(curIndex < values.length) { - currentStream = analyzer.tokenStream(fieldName, - new StringReader(values[curIndex])); - if (orderTokenOffsets) currentStream = new TokenOrderingFilter(currentStream,10); - // add extra space between multiple values - if(curIndex > 0) - extra = analyzer.getPositionIncrementGap(fieldName); - } else { - return null; - } - } - Token nextToken = currentStream.next(); - if(nextToken == null) { - curOffset += values[curIndex].length(); - currentStream = null; - return next(); - } - // create an modified token which is the offset into the concatenated - // string of all values - Token offsetToken = new Token(nextToken.termText(), - nextToken.startOffset() + curOffset, - nextToken.endOffset() + curOffset); - offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10); - return offsetToken; - } - - /** - * Returns all values as a single String into which the Tokens index with - * their offsets. - */ - public String asSingleValue() { - StringBuilder sb = new StringBuilder(); - for(String str : values) - sb.append(str); - return sb.toString(); - } + public static Logger log = Logger.getLogger(SolrHighlighter.class.getName()); + // Thread safe registry + protected final Map formatters = + Collections.synchronizedMap( new HashMap() ); + + // Thread safe registry + protected final Map fragmenters = + Collections.synchronizedMap( new HashMap() ); + + public abstract void initalize( final Config config ); + + + /** + * Check whether Highlighting is enabled for this request. + * @param params The params controlling Highlighting + * @return true if highlighting enabled, false if not. + */ + public boolean isHighlightingEnabled(SolrParams params) { + return params.getBool(HighlightParams.HIGHLIGHT, false); + } + + /** + * Return a String array of the fields to be highlighted. + * Falls back to the programatic defaults, or the default search field if the list of fields + * is not specified in either the handler configuration or the request. + * @param query The current Query + * @param request The current SolrQueryRequest + * @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request. + */ + public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) { + String fields[] = request.getParams().getParams(HighlightParams.FIELDS); + + // if no fields specified in the request, or the handler, fall back to programmatic default, or default search field. + if(emptyArray(fields)) { + // use default search field if highlight fieldlist not specified. + if (emptyArray(defaultFields)) { + String defaultSearchField = request.getSchema().getSolrQueryParser(null).getField(); + fields = null == defaultSearchField ? new String[]{} : new String[]{defaultSearchField}; + } + else { + fields = defaultFields; + } + } + else if (fields.length == 1) { + // if there's a single request/handler value, it may be a space/comma separated list + fields = SolrPluginUtils.split(fields[0]); + } + + return fields; + } + + protected boolean emptyArray(String[] arr) { + return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0); + } + + /** + * Generates a list of Highlighted query fragments for each item in a list + * of documents, or returns null if highlighting is disabled. + * + * @param docs query results + * @param query the query + * @param req the current request + * @param defaultFields default list of fields to summarize + * + * @return NamedList containing a NamedList for each document, which in + * turns contains sets (field, summary) pairs. + */ + @SuppressWarnings("unchecked") + public abstract NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException; } - - -/** Orders Tokens in a window first by their startOffset ascending. - * endOffset is currently ignored. - * This is meant to work around fickleness in the highlighter only. It - * can mess up token positions and should not be used for indexing or querying. - */ -class TokenOrderingFilter extends TokenFilter { - private final int windowSize; - private final LinkedList queue = new LinkedList(); - private boolean done=false; - - protected TokenOrderingFilter(TokenStream input, int windowSize) { - super(input); - this.windowSize = windowSize; - } - - @Override - public Token next() throws IOException { - while (!done && queue.size() < windowSize) { - Token newTok = input.next(); - if (newTok==null) { - done=true; - break; - } - - // reverse iterating for better efficiency since we know the - // list is already sorted, and most token start offsets will be too. - ListIterator iter = queue.listIterator(queue.size()); - while(iter.hasPrevious()) { - if (newTok.startOffset() >= iter.previous().startOffset()) { - // insertion will be before what next() would return (what - // we just compared against), so move back one so the insertion - // will be after. - iter.next(); - break; - } - } - iter.add(newTok); - } - - return queue.isEmpty() ? null : queue.removeFirst(); - } -} - - Added: lucene/solr/trunk/src/test/org/apache/solr/highlight/DummyHighlighter.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/DummyHighlighter.java?rev=639490&view=auto ============================================================================== --- lucene/solr/trunk/src/test/org/apache/solr/highlight/DummyHighlighter.java (added) +++ lucene/solr/trunk/src/test/org/apache/solr/highlight/DummyHighlighter.java Thu Mar 20 15:39:27 2008 @@ -0,0 +1,27 @@ +package org.apache.solr.highlight; + +import java.io.IOException; + +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.Config; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.DocList; + +public class DummyHighlighter extends SolrHighlighter { + + @Override + public NamedList doHighlighting(DocList docs, Query query, + SolrQueryRequest req, String[] defaultFields) throws IOException { + NamedList fragments = new SimpleOrderedMap(); + fragments.add("dummy", "thing1"); + return fragments; + } + + @Override + public void initalize(Config config) { + // do nothing + } + +} Propchange: lucene/solr/trunk/src/test/org/apache/solr/highlight/DummyHighlighter.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterConfigTest.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterConfigTest.java?rev=639490&view=auto ============================================================================== --- lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterConfigTest.java (added) +++ lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterConfigTest.java Thu Mar 20 15:39:27 2008 @@ -0,0 +1,61 @@ +package org.apache.solr.highlight; + +import java.io.IOException; +import java.util.HashMap; + +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.Config; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.DocList; +import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.solr.util.TestHarness; + + +public class HighlighterConfigTest extends AbstractSolrTestCase { + @Override public String getSchemaFile() { return "schema.xml"; } + // the default case (i.e. without a class attribute) is tested every time sorlconfig.xml is used + @Override public String getSolrConfigFile() { return "solrconfig-highlight.xml"; } + + @Override + public void setUp() throws Exception { + // if you override setUp or tearDown, you better call + // the super classes version + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + // if you override setUp or tearDown, you better call + // the super classes version + super.tearDown(); + } + + public void testConfig() + { + SolrHighlighter highlighter = SolrCore.getSolrCore().getHighlighter(); + System.out.println( "highlighter" ); + + assertTrue( highlighter instanceof DummyHighlighter ); + + // check to see that doHighlight is called from the DummyHighlighter + HashMap args = new HashMap(); + args.put("hl", "true"); + args.put("df", "t_text"); + args.put("hl.fl", ""); + TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + + assertU(adoc("t_text", "a long day's night", "id", "1")); + assertU(commit()); + assertU(optimize()); + assertQ("Basic summarization", + sumLRF.makeRequest("long"), + "//lst[@name='highlighting']/str[@name='dummy']" + ); + } +} + + Propchange: lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterConfigTest.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-highlight.xml URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-highlight.xml?rev=639490&view=auto ============================================================================== --- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-highlight.xml (added) +++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-highlight.xml Thu Mar 20 15:39:27 2008 @@ -0,0 +1,366 @@ + + + + + + + + + + ${solr.data.dir:./solr/data} + + + + + false + 10 + + + + 32 + 2147483647 + 10000 + 1000 + 10000 + + + false + + + org.apache.lucene.index.LogByteSizeMergePolicy + + + org.apache.lucene.index.ConcurrentMergeScheduler + + 1000 + 10000 + + single + + + + + false + 10 + 32 + 2147483647 + 10000 + + true + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + true + + + + + true + + 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + 1000 + 1.4142135 + 12 + foo + + + sqrt 2 + log 10 + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + string + elevate.xml + + + + + explicit + + + elevate + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml scheam.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + Propchange: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-highlight.xml ------------------------------------------------------------------------------ svn:eol-style = native