lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Doug Cutting <cutt...@lucene.com>
Subject Re: MultiSearcher discards interim results
Date Fri, 07 Feb 2003 18:58:22 GMT
I'm confused.  The contract of this method is to return the top-scoring 
nDocs.  For a multi-searcher it must compute the top-scoring nDocs from 
each sub-searcher, then find the top-scoring nDocs among these.  If you 
want more of the top-scoring documents, just pass in a larger value for 
nDocs.  If you want all of the matching documents, pass 
Integer.MAX_VALUE.  How does this not meet your needs?

Doug

Ype Kingma wrote:
> Dear developers,
> 
> public TopDocs search(Query query, Filter filter, int nDocs)
> contains an
> else break; 
> which discards previous interim results. 
> 
> Since I expect to need in the order of 100 best results from
> 20 databases on a regular basis I don't really like this.
> 
> This is the current code:
> 
>     for (int i = 0; i < searchables.length; i++) { // search each searcher
>       TopDocs docs = searchables[i].search(query, filter, nDocs);
>       totalHits += docs.totalHits;		  // update totalHits
>       ScoreDoc[] scoreDocs = docs.scoreDocs;
>       for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
> 	ScoreDoc scoreDoc = scoreDocs[j];
> 	if (scoreDoc.score >= minScore) {
> 	  scoreDoc.doc += starts[i];		  // convert doc
> 	  hq.put(scoreDoc);			  // update hit queue
> 	  if (hq.size() > nDocs) {		  // if hit queue overfull
> 	    hq.pop();				  // remove lowest in hit queue
> 	    minScore = ((ScoreDoc)hq.top()).score; // reset minScore
> 	  }
> 	} else
> 	  break;				  // no more scores > minScore
>       }
>     }
> 
> 
> Attached is an untested patch for this. It works by implementing
> a MultiCollector that has the state to collect results from
> the subsearchers without discarding interim results.
> The patch is a dif -c against current CVS.
> 
> I'd like to add some test cases, but before I do that
> I'd prefer to have comments.
> 
> I checked the testcases for MultiSearcher, but they don't
> seem to exercise the code in the patch.
> The existing test-unit build runs fine with the patch.
> 
> Regards,
> Ype
> 
> 
> ------------------------------------------------------------------------
> 
> Index: jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java
> ===================================================================
> RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java,v
> retrieving revision 1.10
> diff -c -r1.10 MultiSearcher.java
> *** jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java	29 Jan 2003 17:18:54
-0000	1.10
> --- jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java	3 Feb 2003 22:43:35
-0000
> ***************
> *** 141,175 ****
>       return maxDoc;
>     }
>   
>     public TopDocs search(Query query, Filter filter, int nDocs)
>         throws IOException {
> !     HitQueue hq = new HitQueue(nDocs);
> !     float minScore = 0.0f;
> !     int totalHits = 0;
> ! 
> !     for (int i = 0; i < searchables.length; i++) { // search each searcher
> !       TopDocs docs = searchables[i].search(query, filter, nDocs);
> !       totalHits += docs.totalHits;		  // update totalHits
> !       ScoreDoc[] scoreDocs = docs.scoreDocs;
> !       for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
> ! 	ScoreDoc scoreDoc = scoreDocs[j];
> ! 	if (scoreDoc.score >= minScore) {
> ! 	  scoreDoc.doc += starts[i];		  // convert doc
> ! 	  hq.put(scoreDoc);			  // update hit queue
> ! 	  if (hq.size() > nDocs) {		  // if hit queue overfull
> ! 	    hq.pop();				  // remove lowest in hit queue
> ! 	    minScore = ((ScoreDoc)hq.top()).score; // reset minScore
>   	  }
> ! 	} else
> ! 	  break;				  // no more scores > minScore
>         }
>       }
>   
>       ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
> !     for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
>         scoreDocs[i] = (ScoreDoc)hq.pop();
>   
> !     return new TopDocs(totalHits, scoreDocs);
>     }
>   
>   
> --- 141,198 ----
>       return maxDoc;
>     }
>   
> + 
>     public TopDocs search(Query query, Filter filter, int nDocs)
>         throws IOException {
> ! 
> !     class MultiCollector extends HitCollector {
> !       HitQueue hq;
> !       int nDocs = 0;
> !       int totalHits = 0;
> !       int start = 0;
> !       float minScore = 0.0f;
> !       ScoreDoc scoreDoc = null; /* reuse last one discarded from hitqueue hq */
> ! 
> !       public MultiCollector(int nd) {
> !         nDocs = nd;
> ! 	hq = new HitQueue(nd);
> !       }
> ! 
> !       public void collect(int doc, float score) {
> !         totalHits++;
> !         System.out.println(getClass() + " hits: " + totalHits + ", start: " + start
> !                                + ", docNr: " + doc + ", score: " + score);
> !         if (score >= minScore) {
> ! 	  if (scoreDoc == null) {
> ! 	    scoreDoc = new ScoreDoc(doc + start, score);
> ! 	  } else {
> ! 	    scoreDoc.doc = doc + start;
> ! 	    scoreDoc.score = score;
> ! 	  }
> !           hq.put(scoreDoc);
> ! 	  if (hq.size() > nDocs) {
> ! 	    scoreDoc = (ScoreDoc) hq.pop();
> ! 	    minScore = ((ScoreDoc)hq.top()).score;
> ! 	  } else {
> ! 	    scoreDoc = null;
>   	  }
> ! 	}
>         }
>       }
>   
> +     MultiCollector mc = new MultiCollector(nDocs);
> + 
> +     for (int i = 0; i < searchables.length; i++) {
> +       mc.start = starts[i];
> +       searchables[i].search(query, filter, mc);
> +     }
> + 
> +     HitQueue hq = mc.hq;
>       ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
> !     for (int i = hq.size()-1; i >= 0; i--)
>         scoreDocs[i] = (ScoreDoc)hq.pop();
>   
> !     return new TopDocs(mc.totalHits, scoreDocs);
>     }
>   
>   
> ***************
> *** 201,207 ****
>   
>       }
>     }
> !   
>     public Query rewrite(Query original) throws IOException {
>       Query[] queries = new Query[searchables.length];
>       for (int i = 0; i < searchables.length; i++) {
> --- 224,230 ----
>   
>       }
>     }
> ! 
>     public Query rewrite(Query original) throws IOException {
>       Query[] queries = new Query[searchables.length];
>       for (int i = 0; i < searchables.length; i++) {
> 
> 
> ------------------------------------------------------------------------
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message