lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Paul Elschot <paul.elsc...@xs4all.nl>
Subject FilteringQuery.java
Date Fri, 30 Jul 2004 21:17:50 GMT
Dear developers,

At the moment IndexSearcher.search(Query, Filter) computes a score
for every document matching the query before checking the filter.

With the BitSet.nextSetBit() method one might implement a
filter as a required clause in a Query. This would even allow the evt. use of 
ConjunctionScorer and skipTo() in appropriate circumstances, currently
all other clauses required.

Below is a Query that intents to do this.
It compiles against current CVS, but it has not yet been tested.
Before I start writing test code I'd like to have some comments.

For very large indexes, and relatively small nrs of filtered docs,
a similar filter could be used with something sparser than a full BitSet,
eg. a byte array of VInts with the differences between the document numbers.

Regards,
Paul.

Here it is, FilteringQuery.java, under Apache 2.0 licence:

package org.apache.lucene.search;

import java.util.BitSet;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;

public abstract class FilteringQuery extends Query {
  Filter filter;
  String filterName;
  
  public FilteringQuery(Filter filter, String filterName) {
    this.filter = filter; /* should be non null */
    this.filterName = filterName; /* for explanations */
  }
  
  protected String getFilterExplanation() {
    return (filterName != null) ? filterName : filter.toString();
  }
  
  /** Prints this <code>FilteringQuery</code> to a <code>String</code>.
   * @param field Should be null because a FilteringQuery depends on a filter.
   */
  public String toString(String field) {
    String res = "FilteringQuery( " + getFilterExplanation() + ")";
    if (field == null)
      return res;
    else
      return res + "(" + field + " ?)";
  }
  
  /** Prints this query to a string. */
  public String toString() { return toString(null); }
  
  /** Expert:
   * @return <code>null</code>. No similarity is used for scoring a </code>FilteringQuery</code>.
   */
  public Similarity getSimilarity(Searcher searcher) {return null;}

  /** Expert: Apply the Filter and use the result in another Query which
   * extends BooleanQuery to have ConjunctionScorer used when it is Query is required.
   */
  public Query rewrite(IndexReader reader) throws IOException {
    
    class SkipReaderBitsQuery extends Query {
      /** Prints this to a <code>String</code>.
       * @param  field Should be null.
       */
      public String toString(String field) {
        String res = "SkipReaderBitsQuery( " + getFilterExplanation() + ")";
        if (field == null)
          return res;
        else
          return res + "(" + field + " ?)";
      }
      
      /** Expert: Constructs a Weight implementation for this <code>SkipReaderBitsQuery</code>.
       * <p>Only implemented by primitive queries, which re-write to themselves.
       */
      protected Weight createWeight(final Searcher searcher) {
        
        class FilterWeight implements Weight {
          public float getValue() {return 0.0f;}

          public void normalize(float norm) {}

          public float sumOfSquaredWeights() {return 0.0f;}

          public Query getQuery() {return FilteringQuery.this;}

          public Explanation explain(IndexReader reader, int doc) {
            return new Explanation(getValue(), "weightless " + getFilterExplanation());
          }

          public Scorer scorer(final IndexReader reader) throws IOException {

            class SkipReaderBitsScorer extends Scorer {
              BitSet docNrs;
              int currentDoc;
              
              FilterReaderBitsScorer(Similarity similarity) throws IOException {
                super(similarity);
                /* CHECKME: ok not to compute the bits earlier? */
                docNrs = FilteringQuery.this.filter.bits(reader);
                currentDoc = -1;
              }
              
              public int doc() {return currentDoc;}

              public float score() {return 0.0f;}
              
              /* should not be called after returning false */
              public boolean next() {
                currentDoc = docNrs.nextSetBit(currentDoc + 1); /* -1 when no next bit */
                return currentDoc >= 0;
              }
              
              /* should not be called after returning false */
              public boolean skipTo(int target) {
                currentDoc = docNrs.nextSetBit((currentDoc < target) ? target : (currentDoc
+ 1));
                return currentDoc >= 0;
              }
              
              public Explanation explain(int doc) {
                skipTo(doc);
                return new Explanation(score() /* zero anyway */,
                                        "document " + doc + " "
                                        + ((currentDoc == doc) 
                                            ? "matches"
                                            : "does not match"
                                          )
                                        + " filter: " + getFilterExplanation());
              }
            }

            return new SkipReaderBitsScorer(getSimilarity(searcher));
          }
        }
        
        return new FilterWeight();
      }
    }
    
    return new SkipReaderBitsQuery();
  }
}


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message