lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Faisal Mansoor <faisal.mans...@gmail.com>
Subject Using stored value of a field to build suggester index
Date Sun, 23 Nov 2014 20:25:14 GMT
Hi,

I am trying to build a suggester for a field which is both index and
stored. The field is whitespace tokenized, lowercased, stemmed etc while
indexing.

It looks like that the indexed terms are used as a source for building the
suggester index. Which is what the following line in the suggester
documentation also mentions.

https://wiki.apache.org/solr/Suggester


   -

   field - if sourceLocation is empty then terms from this field in the
   index will be used when building the trie.

I want to display the suggested value in UI, is it possible to use the
stored value of the field rather than the indexed terms to build the index.

Here are the relevant definitions from solrconfig.xml and schema.xml.

Thanks.
Faisal

solrconfig.xml

  <searchComponent class="solr.SpellCheckComponent"
name="infix_suggest_analyzing">
    <lst name="spellchecker">
      <str name="name">infix_suggest_analyzing</str>
      <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
      <str
name="lookupImpl">org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory</str>
      <str name="buildOnCommit">false</str>

      <!-- Suggester properties -->
      <str name="suggestAnalyzerFieldType">autosuggest_fieldType</str>
      <str
name="dictionaryImpl">org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory</str>
      <str name="field">DisplayName</str>


    </lst>

    <!-- specify a fieldtype using keywordtokenizer + lowercase + cleanup
-->
    <str name="queryAnalyzerFieldType">phrase_suggest</str>
  </searchComponent>


  <requestHandler name="/suggest"
class="org.apache.solr.handler.component.SearchHandler">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
      <str name="spellcheck">true</str>
      <str name="spellcheck.dictionary">infix_suggest_analyzing</str>
      <str name="spellcheck.onlyMorePopular">true</str>
      <str name="spellcheck.count">200</str>
      <str name="spellcheck.collate">true</str>
      <str name="spellcheck.maxCollations">10</str>
    </lst>
    <arr name="components">
      <str>infix_suggest_analyzing</str>
    </arr>
  </requestHandler>


schema.xml


  <fieldType name="autosuggest_fieldType" class="solr.TextField"
positionIncrementGap="100">
    <analyzer>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.ASCIIFoldingFilterFactory"/>
    </analyzer>
  </fieldType>

  <fieldtype name="phrase_suggest" class="solr.TextField">
  <analyzer>
    <tokenizer class="solr.KeywordTokenizerFactory"/>
    <filter class="solr.PatternReplaceFilterFactory"

pattern="([^\p{L}\p{M}\p{N}\p{Cs}]*[\p{L}\p{M}\p{N}\p{Cs}\_]+:)|([^\p{L}\p{M}\p{N}\p{Cs}])+"
            replacement=" " replace="all"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.TrimFilterFactory"/>
  </analyzer>
</fieldtype>


    <fieldType name="text" class="solr.TextField"
positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>


<field name="DisplayName" type="text" indexed="true" stored="true"
required="true" multiValued="false" />

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message