lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Christoph Goller <gol...@detego-software.de>
Subject Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/search IndexSearcher.java
Date Tue, 28 Sep 2004 10:30:52 GMT
Hi Doug,

I think this change is not correct in the FieldDoc case. In case of
sort, score is not necessarily the criterion for the decision whether
a FieldDoc is inserted into the queue or not!

Christoph

cutting@apache.org wrote:
> cutting     2004/09/22 10:03:00
> 
>   Modified:    src/java/org/apache/lucene/search IndexSearcher.java
>   Log:
>   Do not construct a ScoreDoc for every non-zero hit, but only for those
>   in the current top scoring set.  This makes a substantial performance
>   improvement for queries that match lots of documents.
>   
>   Revision  Changes    Path
>   1.20      +26 -18    jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java
>   
>   Index: IndexSearcher.java
>   ===================================================================
>   RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
>   retrieving revision 1.19
>   retrieving revision 1.20
>   diff -u -r1.19 -r1.20
>   --- IndexSearcher.java	19 May 2004 23:05:27 -0000	1.19
>   +++ IndexSearcher.java	22 Sep 2004 17:03:00 -0000	1.20
>   @@ -90,17 +90,21 @@
>        final HitQueue hq = new HitQueue(nDocs);
>        final int[] totalHits = new int[1];
>        scorer.score(new HitCollector() {
>   -	public final void collect(int doc, float score) {
>   -	  if (score > 0.0f &&			  // ignore zeroed buckets
>   -	      (bits==null || bits.get(doc))) {	  // skip docs not in bits
>   -	    totalHits[0]++;
>   -            hq.insert(new ScoreDoc(doc, score));
>   -	  }
>   -	}
>   +        private float minScore = 0.0f;
>   +        public final void collect(int doc, float score) {
>   +          if (score > 0.0f &&                     // ignore zeroed buckets
>   +              (bits==null || bits.get(doc))) {    // skip docs not in bits
>   +            totalHits[0]++;
>   +            if (hq.size() < nDocs || score >= minScore) {
>   +              hq.insert(new ScoreDoc(doc, score));
>   +              minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
>   +            }
>   +          }
>   +        }
>          });
>    
>        ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
>   -    for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
>   +    for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
>          scoreDocs[i] = (ScoreDoc)hq.pop();
>    
>        return new TopDocs(totalHits[0], scoreDocs);
>   @@ -119,17 +123,21 @@
>          new FieldSortedHitQueue(reader, sort.fields, nDocs);
>        final int[] totalHits = new int[1];
>        scorer.score(new HitCollector() {
>   +        private float minScore = 0.0f;
>            public final void collect(int doc, float score) {
>   -          if (score > 0.0f &&			  // ignore zeroed buckets
>   -              (bits==null || bits.get(doc))) {	  // skip docs not in bits
>   +          if (score > 0.0f &&                     // ignore zeroed buckets
>   +              (bits==null || bits.get(doc))) {    // skip docs not in bits
>                totalHits[0]++;
>   -            hq.insert(new FieldDoc(doc, score));
>   +            if (hq.size() < nDocs || score >= minScore) {
>   +              hq.insert(new FieldDoc(doc, score));
>   +              minScore = ((FieldDoc)hq.top()).score; // maintain minScore
>   +            }
>              }
>            }
>          });
>    
>        ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
>   -    for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
>   +    for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
>          scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
>    
>        return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
>   @@ -143,12 +151,12 @@
>        if (filter != null) {
>          final BitSet bits = filter.bits(reader);
>          collector = new HitCollector() {
>   -	  public final void collect(int doc, float score) {
>   -	    if (bits.get(doc)) {		  // skip docs not in bits
>   -	      results.collect(doc, score);
>   -	    }
>   -	  }
>   -	};
>   +          public final void collect(int doc, float score) {
>   +            if (bits.get(doc)) {                  // skip docs not in bits
>   +              results.collect(doc, score);
>   +            }
>   +          }
>   +        };
>        }
>    
>        Scorer scorer = query.weight(this).scorer(reader);
>   
>   
>   
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
> 
> 

-- 
*************************************************************
* Dr. Christoph Goller     Tel. : +49 89 203 45734          *
* Geschäftsführer          Email: goller@detego-software.de *
* Detego Software GmbH     Mail : Keuslinstr. 13,           *
*                                 80798 München, Germany    *
*************************************************************


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message