lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/java/org/apache/lucene/search HitCollector.java IndexSearcher.java MultiSearcher.java Searcher.java
Date Tue, 25 Sep 2001 19:03:35 GMT
cutting     01/09/25 12:03:35

  Modified:    src/java/org/apache/lucene/search HitCollector.java
                        IndexSearcher.java MultiSearcher.java Searcher.java
  Log:
  Incorporated Joanne Sproston's changes to extend lower-level HitCollector-based search API
to MultiSearcher.  I have not yet tested this.
  
  Revision  Changes    Path
  1.2       +8 -2      jakarta-lucene/src/java/org/apache/lucene/search/HitCollector.java
  
  Index: HitCollector.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/HitCollector.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- HitCollector.java	2001/09/18 16:29:56	1.1
  +++ HitCollector.java	2001/09/25 19:03:35	1.2
  @@ -55,7 +55,7 @@
    */
   
   /** Lower-level search API.
  - * @see IndexSearcher#search(Query,HitCollector)
  + * @see Searcher#search(Query,HitCollector)
    */
   public abstract class HitCollector {
     /** Called once for every non-zero scoring document, with the document number
  @@ -71,6 +71,12 @@
      *       }
      *     });
      * </pre>
  -   */
  +   *
  +   * <p>Note: This is called in an inner search loop.  For good search
  +   * performance, implementations of this method should not call {@link
  +   * Searcher#doc(int)} or {@link
  +   * org.apache.lucene.index.IndexReader#document(int)} on every document
  +   * number encountered.  Doing so can slow searches by an order of magnitude
  +   * or more. */
     public abstract void collect(int doc, float score);
   }
  
  
  
  1.2       +7 -15     jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java
  
  Index: IndexSearcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- IndexSearcher.java	2001/09/18 16:29:57	1.1
  +++ IndexSearcher.java	2001/09/25 19:03:35	1.2
  @@ -91,7 +91,8 @@
       return reader.docFreq(term);
     }
   
  -  final Document doc(int i) throws IOException {
  +  /** For use by {@link HitCollector} implementations. */
  +  public final Document doc(int i) throws IOException {
       return reader.document(i);
     }
   
  @@ -140,21 +141,12 @@
      * <p>Applications should only use this if they need <it>all</it> of
the
      * matching documents.  The high-level search API ({@link
      * Searcher#search(Query)}) is usually more efficient, as it skips
  -   * non-high-scoring hits.  */
  -  public final void search(Query query, HitCollector results)
  -      throws IOException {
  -    search(query, null, results);
  -  }
  -
  -  /** Lower-level search API.
  +   * non-high-scoring hits.
      *
  -   * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
  -   * scoring document.
  -   *
  -   * <p>Applications should only use this if they need <it>all</it> of
the
  -   * matching documents.  The high-level search API ({@link
  -   * Searcher#search(Query)}) is usually more efficient, as it skips
  -   * non-high-scoring hits.  */
  +   * @param query to match documents
  +   * @param filter if non-null, a bitset used to eliminate some documents
  +   * @param results to receive hits
  +   */
     public final void search(Query query, Filter filter,
   			   final HitCollector results) throws IOException {
       HitCollector collector = results;
  
  
  
  1.2       +37 -3     jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java
  
  Index: MultiSearcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- MultiSearcher.java	2001/09/18 16:29:57	1.1
  +++ MultiSearcher.java	2001/09/25 19:03:35	1.2
  @@ -92,13 +92,16 @@
       return docFreq;
     }
   
  -  final Document doc(int n) throws IOException {
  +  /** For use by {@link HitCollector} implementations. */
  +  public final Document doc(int n) throws IOException {
       int i = searcherIndex(n);			  // find searcher index
       return searchers[i].doc(n - starts[i]);	  // dispatch to searcher
     }
   
  -  // replace w/ call to Arrays.binarySearch in Java 1.2
  -  private final int searcherIndex(int n) {	  // find searcher for doc n:
  +  /** For use by {@link HitCollector} implementations to identify the
  +   * index of the sub-searcher that a particular hit came from. */
  +  public final int searcherIndex(int n) {	  // find searcher for doc n:
  +    // replace w/ call to Arrays.binarySearch in Java 1.2
       int lo = 0;					  // search starts array
       int hi = searchers.length - 1;		  // for first element less
   						  // than n, return its index
  @@ -148,5 +151,36 @@
         scoreDocs[i] = (ScoreDoc)hq.pop();
       
       return new TopDocs(totalHits, scoreDocs);
  +  }
  +
  +
  +  /** Lower-level search API.
  +   *
  +   * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
  +   * scoring document.
  +   *
  +   * <p>Applications should only use this if they need <it>all</it> of
the
  +   * matching documents.  The high-level search API ({@link
  +   * Searcher#search(Query)}) is usually more efficient, as it skips
  +   * non-high-scoring hits.
  +   *
  +   * @param query to match documents
  +   * @param filter if non-null, a bitset used to eliminate some documents
  +   * @param results to receive hits
  +   */
  +  public final void search(Query query, Filter filter,
  +			   final HitCollector results)
  +    throws IOException {
  +    for (int i = 0; i < searchers.length; i++) {
  +      
  +      final int start = starts[i];
  +
  +      searchers[i].search(query, filter, new HitCollector() {
  +	  public void collect(int doc, float score) {
  +	    results.collect(doc + start, score);
  +	  }
  +	});
  +
  +    }
     }
   }
  
  
  
  1.2       +35 -3     jakarta-lucene/src/java/org/apache/lucene/search/Searcher.java
  
  Index: Searcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/Searcher.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- Searcher.java	2001/09/18 16:29:58	1.1
  +++ Searcher.java	2001/09/25 19:03:35	1.2
  @@ -66,15 +66,46 @@
   
     /** Returns the documents matching <code>query</code>. */
     public final Hits search(Query query) throws IOException {
  -    return search(query, null);
  +    return search(query, (Filter)null);
     }
   
     /** Returns the documents matching <code>query</code> and
       <code>filter</code>. */
  -  public final Hits search(Query query, Filter filter) throws IOException {
  +  public Hits search(Query query, Filter filter) throws IOException {
       return new Hits(this, query, filter);
     }
   
  +  /** Lower-level search API.
  +   *
  +   * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
  +   * scoring document.
  +   *
  +   * <p>Applications should only use this if they need <it>all</it> of
the
  +   * matching documents.  The high-level search API ({@link
  +   * Searcher#search(Query)}) is usually more efficient, as it skips
  +   * non-high-scoring hits.  */
  +  public void search(Query query, HitCollector results)
  +    throws IOException {
  +    search(query, (Filter)null, results);
  +  }    
  +
  +  /** Lower-level search API.
  +   *
  +   * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
  +   * scoring document.
  +   *
  +   * <p>Applications should only use this if they need <it>all</it> of
the
  +   * matching documents.  The high-level search API ({@link
  +   * Searcher#search(Query)}) is usually more efficient, as it skips
  +   * non-high-scoring hits.
  +   *
  +   * @param query to match documents
  +   * @param filter if non-null, a bitset used to eliminate some documents
  +   * @param results to receive hits
  +   */
  +  public abstract void search(Query query, Filter filter, HitCollector results)
  +    throws IOException;
  +
     /** Frees resources associated with this Searcher. */
     abstract public void close() throws IOException;
   
  @@ -82,6 +113,7 @@
     abstract int maxDoc() throws IOException;
     abstract TopDocs search(Query query, Filter filter, int n)
          throws IOException;
  -  abstract Document doc(int i) throws IOException;
   
  +  /** For use by {@link HitCollector} implementations. */
  +  public abstract Document doc(int i) throws IOException;
   }
  
  
  

Mime
View raw message