lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Pigeyre Romain <romain.pige...@sopra.com>
Subject Scoring with wild cars
Date Wed, 24 Sep 2014 18:12:33 GMT
Hi,

I hava two records with name_fra field
One with name_fra="un test CARREAU"
And another one with name_fra="un test CARRE"

{
        "codeBarre": "1",
        "name_FRA": "un test CARREAU"
      }
{
        "codeBarre": "2",
        "name_FRA": "un test CARRE"
      }

Configuration of these fields are :

<field name="name_FRA" type="text_general" indexed="true" stored="true" required="false"
multiValued="false" />
<field name="codeBarre" type="string" indexed="true" stored="true" required="true" multiValued="false"
/>
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"
/>
<copyField source="name_FRA" dest="text"/>

<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"
/>
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true"
expand="false"/>
        -->
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"
/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true"
expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>

When I'm using this query :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre*&fl=score%2C+*&wt=json&indent=true&debugQuery=true
The result is :
{
  "responseHeader":{
    "status":0,
    "QTime":2,
    "params":{
      "debugQuery":"true",
      "fl":"score, *",
      "indent":"true",
      "q":"text:carre*",
      "wt":"json"}},
  "response":{"numFound":2,"start":0,"maxScore":1.0,"docs":[
      {
       "codeBarre":"1",
        "name_FRA":"un test CARREAU",
        "_version_":1480150860842401792,
        "score":1.0},
      {
        "codeBarre":"2",
        "name_FRA":"un test CARRE",
        "_version_":1480150875738472448,
        "score":1.0}]
  },
  "debug":{
    "rawquerystring":"text:carre*",
    "querystring":"text:carre*",
    "parsedquery":"text:carre*",
    "parsedquery_toString":"text:carre*",
    "explain":{
      "1":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n  1.0 = boost\n  1.0 =
queryNorm\n",
      "2":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n  1.0 = boost\n  1.0 =
queryNorm\n"},
    "QParser":"LuceneQParser",
    "timing":{
      "time":2.0,
      "prepare":{
        "time":1.0,
        "query":{
          "time":1.0},
        "facet":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "debug":{
          "time":0.0}},
      "process":{
        "time":1.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "debug":{
          "time":1.0}}}}}

The score is the same for both of record. CARREAU record is first and CARRE is next. I want
to place CARRE before CARREAU result because CARRE is an exact match. Is it possible?

NB : scoring for this query only use querynorm and boosters

In this test :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre&fl=score%2C*&wt=json&indent=true&debugQuery=true

I have only one record found but the scoring is more complex. Why?

{

  "responseHeader":{

    "status":0,

    "QTime":2,

    "params":{

      "debugQuery":"true",

      "fl":"score,*",

      "indent":"true",

      "q":"text:carre",

      "wt":"json"}},

  "response":{"numFound":1,"start":0,"maxScore":0.53033006,"docs":[

      {

        "codeBarre":"2",

        "name_FRA":"un test CARRE",

        "_version_":1480150875738472448,

        "score":0.53033006}]

  },

  "debug":{

    "rawquerystring":"text:carre",

    "querystring":"text:carre",

    "parsedquery":"text:carre",

    "parsedquery_toString":"text:carre",

    "explain":{

      "2":"\n0.53033006 = (MATCH) weight(text:carre in 0) [DefaultSimilarity], result of:\n
 0.53033006 = fieldWeight in 0, product of:\n    1.4142135 = tf(freq=2.0), with freq of:\n
     2.0 = termFreq=2.0\n    1.0 = idf(docFreq=1, maxDocs=2)\n    0.375 = fieldNorm(doc=0)\n"},

    "QParser":"LuceneQParser",

    "timing":{

      "time":2.0,

      "prepare":{

        "time":1.0,

        "query":{

          "time":1.0},

        "facet":{

          "time":0.0},

        "mlt":{

          "time":0.0},

        "highlight":{

          "time":0.0},

        "stats":{

          "time":0.0},

        "expand":{

          "time":0.0},

        "debug":{

          "time":0.0}},

      "process":{

        "time":1.0,

        "query":{

          "time":0.0},

        "facet":{

          "time":0.0},

        "mlt":{

          "time":0.0},

        "highlight":{

          "time":0.0},

        "stats":{

          "time":0.0},

        "expand":{

          "time":0.0},

        "debug":{

          "time":1.0}}}}}





Romain PIGEYRE
Centre de service de Lyon

[Sopra]

Sopra
Parc du Puy d'Or
72 Allée des Noisetiers - CS 10137
69578 - LIMONEST
France
Phone : +33 (0)4 37 26 43 33
romain.pigeyre@sopra.com<mailto:romain.pigeyre@sopra.com> - www.sopra.com<http://www.sopra.com>


[cid:image004.png@01CFD833.DFE6CB90]<http://www.linkedin.com/company/sopra> [cid:image006.png@01CFD833.DFE6CB90]
<https://www.youtube.com/user/SopraChannel>  [cid:image008.png@01CFD833.DFE6CB90] <https://www.facebook.com/sopragroup>
 [cid:image010.png@01CFD833.DFE6CB90] <https://twitter.com/soprarh>  [cid:image012.png@01CFD833.DFE6CB90]
<http://fr.viadeo.com/fr/company/sopra>
Ce message peut contenir des informations confidentielles dont la divulgation est à ce titre
rigoureusement interdite en l'absence d'autorisation explicite de l'émetteur. Dans l'hypothèse
où vous auriez reçu par erreur ce message, merci de le renvoyer à l'émetteur et de détruire
toute copie.

P Pensez à l'environnement avant d'imprimer.


Mime
  • Unnamed multipart/related (inline, None, 0 bytes)
View raw message