lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Qi Li <aler...@gmail.com>
Subject relevant score calculation
Date Tue, 28 Dec 2010 20:11:06 GMT
Happy Holidays !

Test case
    doc1 :   test -- one two three
    doc2 :   test, one two three
    doc3 :   one two three

Search query :  "one two three" by QueryParser and StandardAnalyzer

Question:  why all of three documents have the same score?  I really want
the doc3 has higher score because it is an exact match and short.   Can
anybody explain this?  I will appreciate a lot

Here is my code and its output

public class Test {

    public static void main(String[] args){
        test();
    }

    private static void test(){
        String[] contents = {"test -- one two three",
                             "test, one two three",
                             "one two three"};

        Directory dir = new RAMDirectory();
        try {
            IndexWriter writer = new IndexWriter(dir, new
StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i=0; i<contents.length; i++){
                Document doc = new Document();
                doc.add(new Field("de", contents[i], Field.Store.YES,
Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.close();

            IndexSearcher searcher = new IndexSearcher(dir);
            QueryParser parser = new QueryParser(Version.LUCENE_30,"de", new
StandardAnalyzer(Version.LUCENE_30));

            Query q = parser.parse("one two three");
            TopDocs topDocs = searcher.search(q, 10);
            for (ScoreDoc scoreDoc : topDocs.scoreDocs){
                Document doc = searcher.doc(scoreDoc.doc);
                System.out.println(doc.get("de"));
                Explanation explan = searcher.explain(q, scoreDoc.doc);
                System.out.println(explan.toString());
            }

        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}


test -- one two three
0.6168854 = (MATCH) sum of:
  0.20562847 = (MATCH) weight(de:one in 0), product of:
    0.57735026 = queryWeight(de:one), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:one in 0), product of:
      1.0 = tf(termFreq(de:one)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=0)
  0.20562847 = (MATCH) weight(de:two in 0), product of:
    0.57735026 = queryWeight(de:two), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:two in 0), product of:
      1.0 = tf(termFreq(de:two)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=0)
  0.20562847 = (MATCH) weight(de:three in 0), product of:
    0.57735026 = queryWeight(de:three), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:three in 0), product of:
      1.0 = tf(termFreq(de:three)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=0)

test, one two three
0.6168854 = (MATCH) sum of:
  0.20562847 = (MATCH) weight(de:one in 1), product of:
    0.57735026 = queryWeight(de:one), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:one in 1), product of:
      1.0 = tf(termFreq(de:one)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=1)
  0.20562847 = (MATCH) weight(de:two in 1), product of:
    0.57735026 = queryWeight(de:two), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:two in 1), product of:
      1.0 = tf(termFreq(de:two)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=1)
  0.20562847 = (MATCH) weight(de:three in 1), product of:
    0.57735026 = queryWeight(de:three), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:three in 1), product of:
      1.0 = tf(termFreq(de:three)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=1)

one two three
0.6168854 = (MATCH) sum of:
  0.20562847 = (MATCH) weight(de:one in 2), product of:
    0.57735026 = queryWeight(de:one), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:one in 2), product of:
      1.0 = tf(termFreq(de:one)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=2)
  0.20562847 = (MATCH) weight(de:two in 2), product of:
    0.57735026 = queryWeight(de:two), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:two in 2), product of:
      1.0 = tf(termFreq(de:two)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=2)
  0.20562847 = (MATCH) weight(de:three in 2), product of:
    0.57735026 = queryWeight(de:three), product of:
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.8105233 = queryNorm
    0.35615897 = (MATCH) fieldWeight(de:three in 2), product of:
      1.0 = tf(termFreq(de:three)=1)
      0.71231794 = idf(docFreq=3, maxDocs=3)
      0.5 = fieldNorm(field=de, doc=2)

Best regards,
Qi Li

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message