lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Ian Lea <ian....@gmail.com>
Subject Re: relevant score calculation
Date Wed, 29 Dec 2010 10:44:55 GMT
Some of the factors that go in to the score calculation are encoded as
a byte with inevitable loss of precision.  Maybe length is one of
these and lucene is not differentiating between your 3 and 4 word
docs.  Try indexing a document that is significantly longer than 3 or
4 words.

Further reading: http://lucene.apache.org/java/3_0_3/scoring.html, the
javadocs for Similarity and DefaultSimilarity, whatever Google finds.


--
Ian.


On Tue, Dec 28, 2010 at 8:11 PM, Qi Li <alertli@gmail.com> wrote:
> Happy Holidays !
>
> Test case
>    doc1 :   test -- one two three
>    doc2 :   test, one two three
>    doc3 :   one two three
>
> Search query :  "one two three" by QueryParser and StandardAnalyzer
>
> Question:  why all of three documents have the same score?  I really want
> the doc3 has higher score because it is an exact match and short.   Can
> anybody explain this?  I will appreciate a lot
>
> Here is my code and its output
>
> public class Test {
>
>    public static void main(String[] args){
>        test();
>    }
>
>    private static void test(){
>        String[] contents = {"test -- one two three",
>                             "test, one two three",
>                             "one two three"};
>
>        Directory dir = new RAMDirectory();
>        try {
>            IndexWriter writer = new IndexWriter(dir, new
> StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED);
>            for (int i=0; i<contents.length; i++){
>                Document doc = new Document();
>                doc.add(new Field("de", contents[i], Field.Store.YES,
> Field.Index.ANALYZED));
>                writer.addDocument(doc);
>            }
>            writer.close();
>
>            IndexSearcher searcher = new IndexSearcher(dir);
>            QueryParser parser = new QueryParser(Version.LUCENE_30,"de", new
> StandardAnalyzer(Version.LUCENE_30));
>
>            Query q = parser.parse("one two three");
>            TopDocs topDocs = searcher.search(q, 10);
>            for (ScoreDoc scoreDoc : topDocs.scoreDocs){
>                Document doc = searcher.doc(scoreDoc.doc);
>                System.out.println(doc.get("de"));
>                Explanation explan = searcher.explain(q, scoreDoc.doc);
>                System.out.println(explan.toString());
>            }
>
>        } catch (CorruptIndexException e) {
>            e.printStackTrace();
>        } catch (LockObtainFailedException e) {
>            e.printStackTrace();
>        } catch (ParseException e) {
>            e.printStackTrace();
>        } catch (IOException e) {
>            e.printStackTrace();
>        }
>    }
> }
>
>
> test -- one two three
> 0.6168854 = (MATCH) sum of:
>  0.20562847 = (MATCH) weight(de:one in 0), product of:
>    0.57735026 = queryWeight(de:one), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:one in 0), product of:
>      1.0 = tf(termFreq(de:one)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=0)
>  0.20562847 = (MATCH) weight(de:two in 0), product of:
>    0.57735026 = queryWeight(de:two), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:two in 0), product of:
>      1.0 = tf(termFreq(de:two)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=0)
>  0.20562847 = (MATCH) weight(de:three in 0), product of:
>    0.57735026 = queryWeight(de:three), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:three in 0), product of:
>      1.0 = tf(termFreq(de:three)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=0)
>
> test, one two three
> 0.6168854 = (MATCH) sum of:
>  0.20562847 = (MATCH) weight(de:one in 1), product of:
>    0.57735026 = queryWeight(de:one), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:one in 1), product of:
>      1.0 = tf(termFreq(de:one)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=1)
>  0.20562847 = (MATCH) weight(de:two in 1), product of:
>    0.57735026 = queryWeight(de:two), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:two in 1), product of:
>      1.0 = tf(termFreq(de:two)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=1)
>  0.20562847 = (MATCH) weight(de:three in 1), product of:
>    0.57735026 = queryWeight(de:three), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:three in 1), product of:
>      1.0 = tf(termFreq(de:three)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=1)
>
> one two three
> 0.6168854 = (MATCH) sum of:
>  0.20562847 = (MATCH) weight(de:one in 2), product of:
>    0.57735026 = queryWeight(de:one), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:one in 2), product of:
>      1.0 = tf(termFreq(de:one)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=2)
>  0.20562847 = (MATCH) weight(de:two in 2), product of:
>    0.57735026 = queryWeight(de:two), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:two in 2), product of:
>      1.0 = tf(termFreq(de:two)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=2)
>  0.20562847 = (MATCH) weight(de:three in 2), product of:
>    0.57735026 = queryWeight(de:three), product of:
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.8105233 = queryNorm
>    0.35615897 = (MATCH) fieldWeight(de:three in 2), product of:
>      1.0 = tf(termFreq(de:three)=1)
>      0.71231794 = idf(docFreq=3, maxDocs=3)
>      0.5 = fieldNorm(field=de, doc=2)
>
> Best regards,
> Qi Li
>

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message