lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/java/org/apache/lucene/search BooleanScorer.java Scorer.java TermScorer.java
Date Wed, 22 Sep 2004 17:56:03 GMT
cutting     2004/09/22 10:56:01

  Modified:    src/java/org/apache/lucene/search BooleanScorer.java
                        Scorer.java TermScorer.java
  Log:
  Optimize TermQuery within BooleanQuery, a common case.
  
  Revision  Changes    Path
  1.12      +66 -26    jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer.java
  
  Index: BooleanScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- BooleanScorer.java	16 Aug 2004 11:41:22 -0000	1.11
  +++ BooleanScorer.java	22 Sep 2004 17:56:00 -0000	1.12
  @@ -42,7 +42,7 @@
       public SubScorer next;
   
       public SubScorer(Scorer scorer, boolean required, boolean prohibited,
  -		     HitCollector collector, SubScorer next)
  +                     HitCollector collector, SubScorer next)
         throws IOException {
         this.scorer = scorer;
         this.done = !scorer.next();
  @@ -58,8 +58,8 @@
       int mask = 0;
       if (required || prohibited) {
         if (nextMask == 0)
  -	throw new IndexOutOfBoundsException
  -	  ("More than 32 required/prohibited clauses in query.");
  +        throw new IndexOutOfBoundsException
  +          ("More than 32 required/prohibited clauses in query.");
         mask = nextMask;
         nextMask = nextMask << 1;
       } else
  @@ -69,12 +69,12 @@
         maxCoord++;
   
       if (prohibited)
  -      prohibitedMask |= mask;			  // update prohibited mask
  +      prohibitedMask |= mask;                     // update prohibited mask
       else if (required)
  -      requiredMask |= mask;			  // update required mask
  +      requiredMask |= mask;                       // update required mask
   
       scorers = new SubScorer(scorer, required, prohibited,
  -			    bucketTable.newCollector(mask), scorers);
  +                            bucketTable.newCollector(mask), scorers);
     }
   
     private final void computeCoordFactors() {
  @@ -86,6 +86,46 @@
     private int end;
     private Bucket current;
   
  +  public void score(HitCollector hc) throws IOException {
  +    score(hc, Integer.MAX_VALUE);
  +  }
  +
  +  protected boolean score(HitCollector hc, int max) throws IOException {
  +    if (coordFactors == null)
  +      computeCoordFactors();
  +
  +    boolean more;
  +    do {
  +      while (bucketTable.first != null) {         // more queued
  +        current = bucketTable.first;
  +        if (current.doc >= max)
  +          return true;
  +
  +        // check prohibited & required
  +        if ((current.bits & prohibitedMask) == 0 && 
  +            (current.bits & requiredMask) == requiredMask) {
  +          hc.collect(current.doc, current.score * coordFactors[current.coord]);
  +        }
  +        
  +        bucketTable.first = current.next;         // pop the queue
  +      }
  +
  +      // refill the queue
  +      more = false;
  +      end += BucketTable.SIZE;
  +      for (SubScorer sub = scorers; sub != null; sub = sub.next) {
  +        if (!sub.done) {
  +          sub.done = !sub.scorer.score(sub.collector, end);
  +          if (!sub.done)
  +            more = true;
  +        }
  +      }
  +    } while (bucketTable.first != null || more);
  +
  +    return false;
  +  }
  +
  +
     public int doc() { return current.doc; }
   
     public boolean next() throws IOException {
  @@ -127,20 +167,20 @@
     }
   
     static final class Bucket {
  -    int	doc = -1;				  // tells if bucket is valid
  -    float	score;				  // incremental score
  -    int	bits;					  // used for bool constraints
  -    int	coord;					  // count of terms in score
  -    Bucket 	next;				  // next valid bucket
  +    int doc = -1;                                 // tells if bucket is valid
  +    float       score;                            // incremental score
  +    int bits;                                     // used for bool constraints
  +    int coord;                                    // count of terms in score
  +    Bucket      next;                             // next valid bucket
     }
   
     /** A simple hash table of document scores within a range. */
     static final class BucketTable {
  -    public static final int SIZE = 1 << 10;
  +    public static final int SIZE = 1 << 8;
       public static final int MASK = SIZE - 1;
   
       final Bucket[] buckets = new Bucket[SIZE];
  -    Bucket first = null;			  // head of valid list
  +    Bucket first = null;                          // head of valid list
     
       private BooleanScorer scorer;
   
  @@ -167,20 +207,20 @@
         final int i = doc & BucketTable.MASK;
         Bucket bucket = table.buckets[i];
         if (bucket == null)
  -	table.buckets[i] = bucket = new Bucket();
  +        table.buckets[i] = bucket = new Bucket();
         
  -      if (bucket.doc != doc) {			  // invalid bucket
  -	bucket.doc = doc;			  // set doc
  -	bucket.score = score;			  // initialize score
  -	bucket.bits = mask;			  // initialize mask
  -	bucket.coord = 1;			  // initialize coord
  -
  -	bucket.next = table.first;		  // push onto valid list
  -	table.first = bucket;
  -      } else {					  // valid bucket
  -	bucket.score += score;			  // increment score
  -	bucket.bits |= mask;			  // add bits in mask
  -	bucket.coord++;				  // increment coord
  +      if (bucket.doc != doc) {                    // invalid bucket
  +        bucket.doc = doc;                         // set doc
  +        bucket.score = score;                     // initialize score
  +        bucket.bits = mask;                       // initialize mask
  +        bucket.coord = 1;                         // initialize coord
  +
  +        bucket.next = table.first;                // push onto valid list
  +        table.first = bucket;
  +      } else {                                    // valid bucket
  +        bucket.score += score;                    // increment score
  +        bucket.bits |= mask;                      // add bits in mask
  +        bucket.coord++;                           // increment coord
         }
       }
     }
  
  
  
  1.7       +17 -0     jakarta-lucene/src/java/org/apache/lucene/search/Scorer.java
  
  Index: Scorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/Scorer.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- Scorer.java	17 Aug 2004 20:38:45 -0000	1.6
  +++ Scorer.java	22 Sep 2004 17:56:00 -0000	1.7
  @@ -48,6 +48,23 @@
       }
     }
   
  +  /** Expert: Collects matching documents in a range.  Hook for optimization.
  +   * Note that {@link #next()} must be called once before this method is called
  +   * for the first time.
  +   * @param hc The collector to which all matching documents are passed through
  +   * {@link HitCollector#collect(int, float)}.
  +   * @param max Do not score documents past this.
  +   * @return true if more matching documents may remain.
  +   */
  +  protected boolean score(HitCollector hc, int max) throws IOException {
  +    while (doc() < max) {
  +      hc.collect(doc(), score());
  +      if (!next())
  +        return false;
  +    }
  +    return true;
  +  }
  +
     /** Advances to the next document matching the query.
      * @return true iff there is another document matching the query.
      */
  
  
  
  1.13      +38 -5     jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java
  
  Index: TermScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- TermScorer.java	17 Aug 2004 20:38:45 -0000	1.12
  +++ TermScorer.java	22 Sep 2004 17:56:00 -0000	1.13
  @@ -29,8 +29,8 @@
     private float weightValue;
     private int doc;
   
  -  private final int[] docs = new int[32];	  // buffered doc numbers
  -  private final int[] freqs = new int[32];	  // buffered term freqs
  +  private final int[] docs = new int[32];         // buffered doc numbers
  +  private final int[] freqs = new int[32];        // buffered term freqs
     private int pointer;
     private int pointerMax;
   
  @@ -55,6 +55,39 @@
         scoreCache[i] = getSimilarity().tf(i) * weightValue;
     }
   
  +  public void score(HitCollector hc) throws IOException {
  +    next();
  +    score(hc, Integer.MAX_VALUE);
  +  }
  +
  +  protected boolean score(HitCollector c, int end) throws IOException {
  +    Similarity similarity = getSimilarity();      // cache sim in local
  +    while (doc < end) {                           // for docs in window
  +      int f = freqs[pointer];
  +      float score =                               // compute tf(f)*weight
  +        f < SCORE_CACHE_SIZE                      // check cache
  +         ? scoreCache[f]                          // cache hit
  +         : similarity.tf(f)*weightValue;          // cache miss
  +
  +      score *= Similarity.decodeNorm(norms[doc]); // normalize for field
  +
  +      c.collect(doc, score);                      // collect score
  +
  +      if (++pointer >= pointerMax) {
  +        pointerMax = termDocs.read(docs, freqs);  // refill buffers
  +        if (pointerMax != 0) {
  +          pointer = 0;
  +        } else {
  +          termDocs.close();                       // close stream
  +          doc = Integer.MAX_VALUE;                // set to sentinel value
  +          return false;
  +        }
  +      } 
  +      doc = docs[pointer];
  +    }
  +    return true;
  +  }
  +
     /** Returns the current document number matching the query.
      * Initially invalid, until {@link #next()} is called the first time.
      */
  @@ -72,8 +105,8 @@
         if (pointerMax != 0) {
           pointer = 0;
         } else {
  -        termDocs.close();			  // close stream
  -        doc = Integer.MAX_VALUE;		  // set to sentinel value
  +        termDocs.close();                         // close stream
  +        doc = Integer.MAX_VALUE;                  // set to sentinel value
           return false;
         }
       } 
  @@ -84,7 +117,7 @@
     public float score() {
       int f = freqs[pointer];
       float raw =                                   // compute tf(f)*weight
  -      f < SCORE_CACHE_SIZE			  // check cache
  +      f < SCORE_CACHE_SIZE                        // check cache
         ? scoreCache[f]                             // cache hit
         : getSimilarity().tf(f)*weightValue;        // cache miss
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message