lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r354819 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/
Date Wed, 07 Dec 2005 17:48:56 GMT
Author: yonik
Date: Wed Dec  7 09:48:37 2005
New Revision: 354819

URL: http://svn.apache.org/viewcvs?rev=354819&view=rev
Log:
remove score normalization from expert level search: LUCENE-469

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
    lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java
    lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java
    lucene/java/trunk/src/java/org/apache/lucene/search/MultiSearcher.java
    lucene/java/trunk/src/java/org/apache/lucene/search/ParallelMultiSearcher.java
    lucene/java/trunk/src/java/org/apache/lucene/search/TopDocs.java
    lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocs.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiSearcher.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Dec  7 09:48:37 2005
@@ -66,6 +66,11 @@
     instead of using Integer and Float classes for parsing.
     (Yonik Seeley via Otis Gospodnetic)
 
+ 9. Expert level search routines returning TopDocs and TopFieldDocs
+    no longer normalize scores.  This also fixes bugs related to
+    MultiSearchers and score sorting/normalization.
+    (Luc Vanlerberghe via Yonik Seeley, LUCENE-469)
+
 New features
 
  1. Added support for stored compressed fields (patch #31149)

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java Wed Dec 
7 09:48:37 2005
@@ -132,7 +132,7 @@
     for (int i=0; i<n; ++i)
       fields[i] = comparators[i].sortValue(doc);
     doc.fields = fields;
-    if (maxscore > 1.0f) doc.score /= maxscore;   // normalize scores
+    //if (maxscore > 1.0f) doc.score /= maxscore;   // normalize scores
     return doc;
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/Hits.java Wed Dec  7 09:48:37 2005
@@ -67,8 +67,9 @@
     ScoreDoc[] scoreDocs = topDocs.scoreDocs;
 
     float scoreNorm = 1.0f;
-    if (length > 0 && scoreDocs[0].score > 1.0f) {
-      scoreNorm = 1.0f / scoreDocs[0].score;
+    
+    if (length > 0 && topDocs.getMaxScore() > 1.0f) {
+      scoreNorm = 1.0f / topDocs.getMaxScore();
     }
 
     int end = scoreDocs.length < length ? scoreDocs.length : length;

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/IndexSearcher.java Wed Dec  7 09:48:37
2005
@@ -97,7 +97,7 @@
 
     Scorer scorer = weight.scorer(reader);
     if (scorer == null)
-      return new TopDocs(0, new ScoreDoc[0]);
+      return new TopDocs(0, new ScoreDoc[0], Float.NEGATIVE_INFINITY);
 
     final BitSet bits = filter != null ? filter.bits(reader) : null;
     final HitQueue hq = new HitQueue(nDocs);
@@ -120,7 +120,9 @@
     for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
       scoreDocs[i] = (ScoreDoc)hq.pop();
 
-    return new TopDocs(totalHits[0], scoreDocs);
+    float maxScore = (totalHits[0]==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
+    
+    return new TopDocs(totalHits[0], scoreDocs, maxScore);
   }
 
   // inherit javadoc
@@ -129,7 +131,7 @@
       throws IOException {
     Scorer scorer = weight.scorer(reader);
     if (scorer == null)
-      return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
+      return new TopFieldDocs(0, new ScoreDoc[0], sort.fields, Float.NEGATIVE_INFINITY);
 
     final BitSet bits = filter != null ? filter.bits(reader) : null;
     final FieldSortedHitQueue hq =
@@ -149,7 +151,7 @@
     for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
       scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
 
-    return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
+    return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields(), hq.getMaxScore());
   }
 
   // inherit javadoc

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/MultiSearcher.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/MultiSearcher.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/MultiSearcher.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/MultiSearcher.java Wed Dec  7 09:48:37
2005
@@ -208,8 +208,10 @@
     ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
     for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
       scoreDocs[i] = (ScoreDoc)hq.pop();
-
-    return new TopDocs(totalHits, scoreDocs);
+    
+    float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
+    
+    return new TopDocs(totalHits, scoreDocs, maxScore);
   }
 
   public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort)
@@ -217,10 +219,14 @@
     FieldDocSortedHitQueue hq = null;
     int totalHits = 0;
 
+    float maxScore=Float.NEGATIVE_INFINITY;
+    
     for (int i = 0; i < searchables.length; i++) { // search each searcher
       TopFieldDocs docs = searchables[i].search (weight, filter, n, sort);
+      
       if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
       totalHits += docs.totalHits;		  // update totalHits
+      maxScore = Math.max(maxScore, docs.getMaxScore());
       ScoreDoc[] scoreDocs = docs.scoreDocs;
       for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
         ScoreDoc scoreDoc = scoreDocs[j];
@@ -234,7 +240,7 @@
     for (int i = hq.size() - 1; i >= 0; i--)	  // put docs in array
       scoreDocs[i] = (ScoreDoc) hq.pop();
 
-    return new TopFieldDocs (totalHits, scoreDocs, hq.getFields());
+    return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore);
   }
 
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/ParallelMultiSearcher.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/ParallelMultiSearcher.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/ParallelMultiSearcher.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/ParallelMultiSearcher.java Wed Dec
 7 09:48:37 2005
@@ -90,7 +90,9 @@
     for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
       scoreDocs[i] = (ScoreDoc) hq.pop();
 
-    return new TopDocs(totalHits, scoreDocs);
+    float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
+    
+    return new TopDocs(totalHits, scoreDocs, maxScore);
   }
 
   /**
@@ -120,6 +122,8 @@
       msta[i].start();
     }
 
+    float maxScore=Float.NEGATIVE_INFINITY;
+    
     for (int i = 0; i < searchables.length; i++) {
       try {
         msta[i].join();
@@ -129,6 +133,7 @@
       IOException ioe = msta[i].getIOException();
       if (ioe == null) {
         totalHits += msta[i].hits();
+        maxScore=Math.max(maxScore, msta[i].getMaxScore());
       } else {
         // if one search produced an IOException, rethrow it
         throw ioe;
@@ -139,7 +144,7 @@
     for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
       scoreDocs[i] = (ScoreDoc) hq.pop();
 
-    return new TopFieldDocs(totalHits, scoreDocs, hq.getFields());
+    return new TopFieldDocs(totalHits, scoreDocs, hq.getFields(), maxScore);
   }
 
   /** Lower-level search API.
@@ -274,6 +279,10 @@
     return docs.totalHits;
   }
 
+  public float getMaxScore() {
+      return docs.getMaxScore();
+  }
+  
   public IOException getIOException() {
     return ioe;
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/TopDocs.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/TopDocs.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/TopDocs.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/TopDocs.java Wed Dec  7 09:48:37 2005
@@ -25,10 +25,23 @@
   public int totalHits;
   /** Expert: The top hits for the query. */
   public ScoreDoc[] scoreDocs;
-
+  /** Expert: Stores the maximum score value encountered, needed for normalizing. */
+  private float maxScore;
+  
+  /** Expert: Returns the maximum score value encountered. */
+  public float getMaxScore() {
+      return maxScore;
+  }
+  
+  /** Expert: Sets the maximum score value encountered. */
+  public void setMaxScore(float maxScore) {
+      this.maxScore=maxScore;
+  }
+  
   /** Expert: Constructs a TopDocs.*/
-  TopDocs(int totalHits, ScoreDoc[] scoreDocs) {
+  TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) {
     this.totalHits = totalHits;
     this.scoreDocs = scoreDocs;
+    this.maxScore = maxScore;
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocs.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocs.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocs.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/TopFieldDocs.java Wed Dec  7 09:48:37
2005
@@ -32,14 +32,15 @@
 
 	/** The fields which were used to sort results by. */
 	public SortField[] fields;
-
+        
 	/** Creates one of these objects.
 	 * @param totalHits  Total number of hits for the query.
 	 * @param scoreDocs  The top hits for the query.
 	 * @param fields     The sort criteria used to find the top hits.
+	 * @param maxScore   The maximum score encountered.
 	 */
-	TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields) {
-	  super (totalHits, scoreDocs);
+	TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
+	  super (totalHits, scoreDocs, maxScore);
 	  this.fields = fields;
 	}
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiSearcher.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiSearcher.java?rev=354819&r1=354818&r2=354819&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiSearcher.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiSearcher.java Wed Dec  7
09:48:37 2005
@@ -17,6 +17,7 @@
  */
 
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
@@ -180,5 +181,114 @@
             Document d = hits3.doc(i);
         }
         mSearcher3.close();
+    }
+    
+    private static Document createDocument(String contents1, String contents2) {
+        Document document=new Document();
+        
+        document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
+        
+        if (contents2!=null) {
+            document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
+        }
+        
+        return document;
+    }
+    
+    private static void initIndex(Directory directory, int nDocs, boolean create, String
contents2) throws IOException {
+        IndexWriter indexWriter=null;
+        
+        try {
+            indexWriter=new IndexWriter(directory, new KeywordAnalyzer(), create);
+            
+            for (int i=0; i<nDocs; i++) {
+                indexWriter.addDocument(createDocument("doc" + i, contents2));
+            }
+        } finally {
+            if (indexWriter!=null) {
+                indexWriter.close();
+            }
+        }
+    }
+    
+    /* uncomment this when the highest score is always normalized to 1.0, even when it was
< 1.0
+    public void testNormalization1() throws IOException {
+        testNormalization(1, "Using 1 document per index:");
+    }
+     */
+    
+    public void testNormalization10() throws IOException {
+        testNormalization(10, "Using 10 documents per index:");
+    }
+    
+    private void testNormalization(int nDocs, String message) throws IOException {
+        Query query=new TermQuery(new Term("contents", "doc0"));
+        
+        RAMDirectory ramDirectory1;
+        IndexSearcher indexSearcher1;
+        Hits hits;
+        
+        ramDirectory1=new RAMDirectory();
+        
+        // First put the documents in the same index
+        initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0",
"doc1", etc...
+        initIndex(ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0"
and "x", "doc1" and x, etc...
+        
+        indexSearcher1=new IndexSearcher(ramDirectory1);
+        
+        hits=indexSearcher1.search(query);
+        
+        assertEquals(message, 2, hits.length());
+        
+        assertEquals(message, 1, hits.score(0), 1e-6); // hits.score(0) is 0.594535 if only
a single document is in first index
+        
+        // Store the scores for use later
+        float[] scores={ hits.score(0), hits.score(1) };
+        
+        assertTrue(message, scores[0] > scores[1]);
+        
+        indexSearcher1.close();
+        ramDirectory1.close();
+        hits=null;
+        
+        
+        
+        RAMDirectory ramDirectory2;
+        IndexSearcher indexSearcher2;
+        
+        ramDirectory1=new RAMDirectory();
+        ramDirectory2=new RAMDirectory();
+        
+        // Now put the documents in a different index
+        initIndex(ramDirectory1, nDocs, true, null); // documents with a single token "doc0",
"doc1", etc...
+        initIndex(ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and
"x", "doc1" and x, etc...
+        
+        indexSearcher1=new IndexSearcher(ramDirectory1);
+        indexSearcher2=new IndexSearcher(ramDirectory2);
+        
+        Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2
});
+        
+        hits=searcher.search(query);
+        
+        assertEquals(message, 2, hits.length());
+        
+        // The scores should be the same (within reason)
+        assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from
ramDirectory1
+        assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from
ramDirectory2
+        
+        
+        
+        // Adding a Sort.RELEVANCE object should not change anything
+        hits=searcher.search(query, Sort.RELEVANCE);
+        
+        assertEquals(message, 2, hits.length());
+        
+        assertEquals(message, scores[0], hits.score(0), 1e-6); // This will a document from
ramDirectory1
+        assertEquals(message, scores[1], hits.score(1), 1e-6); // This will a document from
ramDirectory2
+        
+        searcher.close();
+        
+        ramDirectory1.close();
+        ramDirectory2.close();
     }
 }



Mime
View raw message