lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Markus Jelsma <markus.jel...@openindex.io>
Subject RE: configuring per-field similarity in Solr 4: "the global similarity does not support it"
Date Mon, 17 Dec 2012 22:17:54 GMT
Hi Tom,

The global similarity must be able to delegate similarity to your per-field setting. Solr
has the SchemaSimilarityFactory that can do this. Please replace your global similarity with:

<similarity class="solr.SchemaSimilarityFactory"/>

Keep in mind that coord and queryNorm (=1.0f) are not implemented now, so you will get different
scores for TF-IDF!

Cheers,

 
 
-----Original message-----
> From:Tom Burton-West <tburtonw@umich.edu>
> Sent: Mon 17-Dec-2012 23:11
> To: solr-user@lucene.apache.org
> Subject: configuring per-field similarity in Solr 4: &quot;the global similarity
does not support it&quot;
> 
> Hello,
> 
> I have Solr 4 configured with several fields using different similarity
> classes according to:
> http://wiki.apache.org/solr/SchemaXml#Similarity
> 
> However, I get this error message:
> " FieldType 'DFR' is configured with a similarity, but the global
> similarity does not support it: class
> org.apache.solr.search.similarities.DefaultSimilarityFactory"
> 
> Excerpt from schema.xml below.
> 
> What I am trying to do is have any field that doesn't specify a similarity
> to use the default, but to set up 3 specific fields to use the DFR, IB, and
> BM25 similarities respectively.
> 
> I think I'm missing something here.  Can someone point me to documentation
> or examples?
> 
> Tom
> 
> ----------------
> Simplified schema.xml excerpt:
>  <fieldType name="CJKFullText" class="solr.TextField"
> positionIncrementGap="100"  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
>     </fieldType>
> 
> <!--###########################################################-->
> <!--  relevance rank testing -->
> 
> 
>  <fieldType name="DFR" class="solr.TextField" positionIncrementGap="100"
>  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
> 
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
> 
> <similarity class="solr.DFRSimilarityFactory">
>       <str name="basicModel">I(F)</str>
>       <str name="afterEffect">B</str>
>       <str name="normalization">H2</str>
>     </similarity>
> 
> 
>     </fieldType>
> 
> 
>  <fieldType name="IB" class="solr.TextField" positionIncrementGap="100"
>  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
> 
>  <similarity class="solr.IBSimilarityFactory">
>       <str name="distribution">SPL</str>
>       <str name="lambda">DF</str>
>       <str name="normalization">H2</str>
>     </similarity>
>     </fieldType>
> 
> 
>  <fieldType name="BM25" class="solr.TextField" positionIncrementGap="100"
>  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>       </analyzer>
> 
>  <similarity class="solr.BM25SimilarityFactory">
> <!-- start with the defaults  -->
>       <float name="k1">1.2</float>
>       <float name="b">0.75</float>
>     </similarity>
> 
>     </fieldType>
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> =======================================================================================-
> Excerpt from actual schema.xml
>  <fieldType name="CJKFullText" class="solr.TextField"
> positionIncrementGap="100"  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>          han="true" hiragana="true"
>         katakana="false" hangul="false"   />
> 
> 
>         <filter class="solr.CommonGramsFilterFactory"
> words="1000common.txt" />
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
> 
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>            han="true" hiragana="true"
>           katakana="false" hangul="false"   />
> 
>         <filter class="solr.CommonGramsQueryFilterFactory"
> words="1000common.txt" />
>       </analyzer>
>     </fieldType>
> 
> <!--###########################################################-->
> <!--  relevance rank testing -->
> 
> 
>  <fieldType name="DFR" class="solr.TextField" positionIncrementGap="100"
>  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>          han="true" hiragana="true"
>         katakana="false" hangul="false"   />
> 
> 
>         <filter class="solr.CommonGramsFilterFactory"
> words="1000common.txt" />
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
> 
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>            han="true" hiragana="true"
>           katakana="false" hangul="false"   />
> 
>         <filter class="solr.CommonGramsQueryFilterFactory"
> words="1000common.txt" />
>       </analyzer>
> 
> <similarity class="solr.DFRSimilarityFactory">
>       <str name="basicModel">I(F)</str>
>       <str name="afterEffect">B</str>
>       <str name="normalization">H2</str>
>     </similarity>
> 
> 
>     </fieldType>
> 
> 
>  <fieldType name="IB" class="solr.TextField" positionIncrementGap="100"
>  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>          han="true" hiragana="true"
>         katakana="false" hangul="false"   />
> 
> 
>         <filter class="solr.CommonGramsFilterFactory"
> words="1000common.txt" />
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
> 
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>            han="true" hiragana="true"
>           katakana="false" hangul="false"   />
> 
>         <filter class="solr.CommonGramsQueryFilterFactory"
> words="1000common.txt" />
>       </analyzer>
>  <similarity class="solr.IBSimilarityFactory">
>       <str name="distribution">SPL</str>
>       <str name="lambda">DF</str>
>       <str name="normalization">H2</str>
>     </similarity>
> 
> 
>     </fieldType>
> 
> 
>  <fieldType name="BM25" class="solr.TextField" positionIncrementGap="100"
>  autoGeneratePhraseQueries="false">
>       <analyzer type="index">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>          han="true" hiragana="true"
>         katakana="false" hangul="false"   />
> 
> 
>         <filter class="solr.CommonGramsFilterFactory"
> words="1000common.txt" />
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.ICUTokenizerFactory"/>
> 
>         <filter class="solr.ICUFoldingFilterFactory"/>
>         <filter class="solr.CJKBigramFilterFactory"
>            han="true" hiragana="true"
>           katakana="false" hangul="false"   />
> 
>         <filter class="solr.CommonGramsQueryFilterFactory"
> words="1000common.txt" />
>       </analyzer>
> 
> 
>  <similarity class="solr.BM25SimilarityFactory">
> <!-- start with the defaults  -->
>       <float name="k1">1.2</float>
>       <float name="b">0.75</float>
>     </similarity>
> 
>     </fieldType>
> 

Mime
View raw message