lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/test/org/apache/lucene/search TestBasics.java TestSimilarity.java
Date Fri, 30 Jan 2004 22:10:00 GMT
cutting     2004/01/30 14:10:00

  Modified:    .        CHANGES.txt
               src/java/org/apache/lucene/search Similarity.java
               src/test/org/apache/lucene/search TestBasics.java
                        TestSimilarity.java
  Added:       src/java/org/apache/lucene/search/spans NearSpans.java
                        SpanFirstQuery.java SpanNearQuery.java
                        SpanNotQuery.java SpanOrQuery.java SpanQuery.java
                        SpanQueue.java SpanScorer.java SpanTermQuery.java
                        SpanWeight.java Spans.java package.html
  Log:
  Added new span-based query API.
  
  Revision  Changes    Path
  1.69      +4 -1      jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.68
  retrieving revision 1.69
  diff -u -r1.68 -r1.69
  --- CHANGES.txt	30 Jan 2004 17:07:53 -0000	1.68
  +++ CHANGES.txt	30 Jan 2004 22:09:59 -0000	1.69
  @@ -42,6 +42,9 @@
    6. Changed FSDirectory to auto-create a full directory tree that it
       needs by using mkdirs() instead of mkdir().  (Mladen Turk via Otis)
   
  + 7. Added a new span-based query API.  This implements, among other
  +    things, nested phrases.  See javadocs for details.  (Doug Cutting)
  +
   
   1.3 final
   
  
  
  
  1.11      +8 -5      jakarta-lucene/src/java/org/apache/lucene/search/Similarity.java
  
  Index: Similarity.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/Similarity.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- Similarity.java	17 Oct 2003 17:15:57 -0000	1.10
  +++ Similarity.java	30 Jan 2004 22:10:00 -0000	1.11
  @@ -55,7 +55,9 @@
    */
   
   import java.io.IOException;
  -import java.util.Vector;
  +
  +import java.util.Collection;
  +import java.util.Iterator;
   
   import org.apache.lucene.index.Term;
   
  @@ -296,14 +298,15 @@
      * <p>The default implementation sums the {@link #idf(Term,Searcher)} factor
      * for each term in the phrase.
      *
  -   * @param terms the vector of terms in the phrase
  +   * @param terms the terms in the phrase
      * @param searcher the document collection being searched
      * @return a score factor for the phrase
      */
  -  public float idf(Vector terms, Searcher searcher) throws IOException {
  +  public float idf(Collection terms, Searcher searcher) throws IOException {
       float idf = 0.0f;
  -    for (int i = 0; i < terms.size(); i++) {
  -      idf += idf((Term)terms.elementAt(i), searcher);
  +    Iterator i = terms.iterator();
  +    while (i.hasNext()) {
  +      idf += idf((Term)i.next(), searcher);
       }
       return idf;
     }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/NearSpans.java
  
  Index: NearSpans.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.List;
  import java.util.ArrayList;
  import java.util.Iterator;
  
  import org.apache.lucene.index.IndexReader;
  
  class NearSpans implements Spans {
    private SpanNearQuery query;
                                                    
    private List ordered = new ArrayList();         // spans in query order
    private int slop;                               // from query
    private boolean inOrder;                        // from query
  
    private SpansCell first;                        // linked list of spans
    private SpansCell last;                         // sorted by doc only
  
    private int totalLength;                        // sum of current lengths
  
    private SpanQueue queue;                        // sorted queue of spans
    private SpansCell max;                          // max element in queue
  
    private boolean more = true;                    // true iff not done
    private boolean firstTime = true;               // true before first next()
  
    private boolean queueStale = false;             // true if queue not sorted
    private boolean listStale = true;               // true if list not sorted
  
    /** Wraps a Spans, and can be used to form a linked list. */
    private class SpansCell implements Spans {
      private Spans spans;
      private SpansCell next;
      private int length = -1;
            
      public SpansCell(Spans spans) { this.spans = spans; }
            
      public boolean next() throws IOException {
        if (length != -1)                           // subtract old length
          totalLength -= length;
  
        boolean more = spans.next();                // move to next
  
        if (more) {
          length = end() - start();                 // compute new length
          totalLength += length;                    // add new length to total
  
          if (max == null || doc() > max.doc() ||   // maintain max
              (doc() == max.doc() && end() > max.end()))
            max = this;
        }
  
        return more;
      }
  
      public boolean skipTo(int target) throws IOException {
        if (length != -1)                           // subtract old length
          totalLength -= length;
  
        boolean more = spans.skipTo(target);        // skip
  
        if (more) {
          length = end() - start();                 // compute new length
          totalLength += length;                    // add new length to total
  
          if (max == null || doc() > max.doc() ||   // maintain max
              (doc() == max.doc() && end() > max.end()))
            max = this;
        }
  
        return more;
      }
  
      public int doc() { return spans.doc(); }
      public int start() { return spans.start(); }
      public int end() { return spans.end(); }
  
      public String toString() { return spans.toString(); }
    }
  
    public NearSpans(SpanNearQuery query, IndexReader reader)
      throws IOException {
      this.query = query;
      this.slop = query.getSlop();
      this.inOrder = query.isInOrder();
  
      SpanQuery[] clauses = query.getClauses();     // initialize spans & list
      queue = new SpanQueue(clauses.length);
      for (int i = 0; i < clauses.length; i++) {
        SpansCell cell =                            // construct clause spans
          new SpansCell(clauses[i].getSpans(reader));
        ordered.add(cell);                          // add to ordered
      }
    }
          
    public boolean next() throws IOException {
      if (firstTime) {
        initList(true);
        listToQueue();                            // initialize queue
        firstTime = false;
      } else {
        more = last.next();                         // trigger scan
        queueStale = true;
      }
            
      while (more) {
  
        if (listStale) {                            // maintain list
          queueToList();
          listStale = false;
        }
        
        // skip to doc w/ all clauses
  
        while (more && first.doc() < last.doc()) { 
          more = first.skipTo(last.doc());          // skip first upto last
          firstToLast();                            // and move it to the end
          queueStale = true;
        }
  
        if (!more) return false;
  
        // found doc w/ all clauses
  
        if (queueStale) {                           // maintain the queue
          listToQueue();                    
          queueStale = false;
        }
  
        int matchLength = max.end() - min().start();
        if (((matchLength - totalLength) <= slop)   // check slop
            && (!inOrder || matchIsOrdered())) {    // check order
          return true;
        }
        
        more = min().next();                        // trigger further scanning
  
        if (more) {
          queue.adjustTop();                        // maintain queue
          if (min().doc() != max.doc()) {
            listStale = true;                       // maintain list
          }
        }
      }
      return false;                                 // no more matches
    }
  
    public boolean skipTo(int target) throws IOException {
      if (firstTime) {
        initList(false);
        firstTime = false;
      }
  
      for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
        more = cell.skipTo(target);
      }
  
      if (more) {
        listToQueue();
        listStale = true;
  
        if (min().doc() == max.doc()) {             // at a match?
          int matchLength = max.end() - min().start();
          if ((matchLength - totalLength) <= slop) {
            return true;
          }
        }
        return next();                              // no, scan
      }
      
      return false;
    }
  
    private SpansCell min() { return (SpansCell)queue.top(); }
  
    public int doc() { return min().doc(); }
    public int start() { return min().start(); }
    public int end() { return max.end(); }
  
    public String toString() { return "spans(" + query.toString() + ")"; }
  
    private void initList(boolean next) throws IOException {
      for (int i = 0; more && i < ordered.size(); i++) {
        SpansCell cell = (SpansCell)ordered.get(i);
        if (next)
          more = cell.next();                       // move to first entry
        if (more) {
          addToList(cell);                          // add to list
        }
      }
    }
  
    private void addToList(SpansCell cell) {
      if (last != null) {			  // add next to end of list
        last.next = cell;
      } else
        first = cell;
      last = cell;
      cell.next = null;
    }
  
    private void firstToLast() {
      last.next = first;			  // move first to end of list
      last = first;
      first = first.next;
      last.next = null;
    }
  
    private void queueToList() {
      last = first = null;
      while (queue.top() != null) {
        addToList((SpansCell)queue.pop());
      }
    }
  
    private void listToQueue() {
      queue.clear();
      for (SpansCell cell = first; cell != null; cell = cell.next) {
        queue.put(cell);                      // build queue from list
      }
    }
          
    private boolean matchIsOrdered() {
      int lastStart = -1;
      for (int i = 0; i < ordered.size(); i++) {
        int start = ((SpansCell)ordered.get(i)).start();
        if (!(start > lastStart))
          return false;
        lastStart = start;
      }
      return true;
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
  
  Index: SpanFirstQuery.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.Collection;
  
  import org.apache.lucene.index.IndexReader;
  
  /** Matches spans near the beginning of a field. */
  public class SpanFirstQuery extends SpanQuery {
    private SpanQuery match;
    private int end;
  
    /** Construct a SpanFirstQuery matching spans in <code>match</code> whose end
     * position is less than or equal to <code>end</code>. */
    public SpanFirstQuery(SpanQuery match, int end) {
      this.match = match;
      this.end = end;
    }
  
    /** Return the SpanQuery whose matches are filtered. */
    public SpanQuery getMatch() { return match; }
  
    /** Return the maximum end position permitted in a match. */
    public int getEnd() { return end; }
  
    public String getField() { return match.getField(); }
  
    public Collection getTerms() { return match.getTerms(); }
  
    public String toString(String field) {
      StringBuffer buffer = new StringBuffer();
      buffer.append("spanFirst(");
      buffer.append(match.toString(field));
      buffer.append(", ");
      buffer.append(end);
      buffer.append(")");
      return buffer.toString();
    }
  
    public Spans getSpans(final IndexReader reader) throws IOException {
      return new Spans() {
          private Spans spans = match.getSpans(reader);
  
          public boolean next() throws IOException {
            while (spans.next()) {                  // scan to next match
              if (end() <= end)
                return true;
            }
            return false;
          }
  
          public boolean skipTo(int target) throws IOException {
            if (!spans.skipTo(target))
              return false;
  
            if (spans.end() <= end)                 // there is a match
              return true;
            
            return next();                          // scan to next match
          }
  
          public int doc() { return spans.doc(); }
          public int start() { return spans.start(); }
          public int end() { return spans.end(); }
  
          public String toString() {
            return "spans(" + SpanFirstQuery.this.toString() + ")";
          }
  
        };
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
  
  Index: SpanNearQuery.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.Collection;
  import java.util.List;
  import java.util.ArrayList;
  import java.util.Iterator;
  
  
  import org.apache.lucene.index.IndexReader;
  
  /** Matches spans which are near one another.  One can specify <i>slop</i>, the
   * maximum number of intervening unmatched positions, as well as whether
   * matches are required to be in-order. */
  public class SpanNearQuery extends SpanQuery {
    private List clauses;
    private int slop;
    private boolean inOrder;
  
    private String field;
  
    /** Construct a SpanNearQuery.  Matches spans matching a span from each
     * clause, with up to <code>slop</code> total unmatched positions between
     * them.  * When <code>inOrder</code> is true, the spans from each clause
     * must be * ordered as in <code>clauses</code>. */
    public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
  
      // copy clauses array into an ArrayList
      this.clauses = new ArrayList(clauses.length);
      for (int i = 0; i < clauses.length; i++) {
        SpanQuery clause = clauses[i];
        if (i == 0) {                               // check field
          field = clause.getField();
        } else if (!clause.getField().equals(field)) {
          throw new IllegalArgumentException("Clauses must have same field.");
        }
        this.clauses.add(clause);
      }
      
      this.slop = slop;
      this.inOrder = inOrder;
    }
  
    /** Return the clauses whose spans are matched. */
    public SpanQuery[] getClauses() {
      return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]);
    }
  
    /** Return the maximum number of intervening unmatched positions permitted.*/
    public int getSlop() { return slop; }
  
    /** Return true if matches are required to be in-order.*/
    public boolean isInOrder() { return inOrder; }
  
    public String getField() { return field; }
  
    public Collection getTerms() {
      Collection terms = new ArrayList();
      Iterator i = clauses.iterator();
      while (i.hasNext()) {
        SpanQuery clause = (SpanQuery)i.next();
        terms.addAll(clause.getTerms());
      }
      return terms;
    }
  
    public String toString(String field) {
      StringBuffer buffer = new StringBuffer();
      buffer.append("spanNear([");
      Iterator i = clauses.iterator();
      while (i.hasNext()) {
        SpanQuery clause = (SpanQuery)i.next();
        buffer.append(clause.toString(field));
        if (i.hasNext()) {
          buffer.append(", ");
        }
      }
      buffer.append("], ");
      buffer.append(slop);
      buffer.append(", ");
      buffer.append(inOrder);
      buffer.append(")");
      return buffer.toString();
    }
  
    public Spans getSpans(final IndexReader reader) throws IOException {
      if (clauses.size() == 0)                      // optimize 0-clause case
        return new SpanOrQuery(getClauses()).getSpans(reader);
  
      if (clauses.size() == 1)                      // optimize 1-clause case
        return ((SpanQuery)clauses.get(0)).getSpans(reader);
  
      return new NearSpans(this, reader);
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
  
  Index: SpanNotQuery.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.Collection;
  
  import org.apache.lucene.index.IndexReader;
  
  /** Removes matches which overlap with another SpanQuery. */
  public class SpanNotQuery extends SpanQuery {
    private SpanQuery include;
    private SpanQuery exclude;
  
    /** Construct a SpanNotQuery matching spans from <code>include</code> which
     * have no overlap with spans from <code>exclude</code>.*/
    public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
      this.include = include;
      this.exclude = exclude;
  
      if (!include.getField().equals(exclude.getField()))
        throw new IllegalArgumentException("Clauses must have same field.");
    }
  
    /** Return the SpanQuery whose matches are filtered. */
    public SpanQuery getInclude() { return include; }
  
    /** Return the SpanQuery whose matches must not overlap those returned. */
    public SpanQuery getExclude() { return exclude; }
  
    public String getField() { return include.getField(); }
  
    public Collection getTerms() { return include.getTerms(); }
  
    public String toString(String field) {
      StringBuffer buffer = new StringBuffer();
      buffer.append("spanNot(");
      buffer.append(include.toString(field));
      buffer.append(", ");
      buffer.append(exclude.toString(field));
      buffer.append(")");
      return buffer.toString();
    }
  
  
    public Spans getSpans(final IndexReader reader) throws IOException {
      return new Spans() {
          private Spans includeSpans = include.getSpans(reader);
          private boolean moreInclude = true;
  
          private Spans excludeSpans = exclude.getSpans(reader);
          private boolean moreExclude = true;
  
          public boolean next() throws IOException {
            if (moreInclude)                        // move to next include
              moreInclude = includeSpans.next();
  
            while (moreInclude && moreExclude) {
              
              if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
                moreExclude = excludeSpans.skipTo(includeSpans.doc());
              
              while (moreExclude                    // while exclude is before
                     && includeSpans.doc() == excludeSpans.doc()
                     && excludeSpans.end() <= includeSpans.start()) {
                moreExclude = excludeSpans.next();  // increment exclude
              }
  
              if (!moreExclude                      // if no intersection
                  || includeSpans.doc() != excludeSpans.doc()
                  || includeSpans.end() <= excludeSpans.start())
                break;                              // we found a match
              
              moreInclude = includeSpans.next();    // intersected: keep scanning
            }            
            return moreInclude;
          }
  
          public boolean skipTo(int target) throws IOException {
            if (moreInclude)                        // skip include
              moreInclude = includeSpans.skipTo(target);
  
            if (!moreInclude)
              return false;
  
            if (moreExclude                         // skip exclude
                && includeSpans.doc() > excludeSpans.doc())
              moreExclude = excludeSpans.skipTo(includeSpans.doc());
  
            while (moreExclude                      // while exclude is before
                   && includeSpans.doc() == excludeSpans.doc()
                   && excludeSpans.end() <= includeSpans.start()) {
              moreExclude = excludeSpans.next();    // increment exclude
            }
  
            if (!moreExclude                      // if no intersection
                  || includeSpans.doc() != excludeSpans.doc()
                  || includeSpans.end() <= excludeSpans.start())
              return true;                          // we found a match
            
            return next();                          // scan to next match
          }
  
          public int doc() { return includeSpans.doc(); }
          public int start() { return includeSpans.start(); }
          public int end() { return includeSpans.end(); }
  
          public String toString() {
            return "spans(" + SpanNotQuery.this.toString() + ")";
          }
  
        };
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
  
  Index: SpanOrQuery.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.List;
  import java.util.Collection;
  import java.util.ArrayList;
  import java.util.Iterator;
  
  import org.apache.lucene.index.IndexReader;
  
  /** Matches the union of its clauses.*/
  public class SpanOrQuery extends SpanQuery {
    private List clauses;
    private String field;
  
    /** Construct a SpanOrQuery merging the provided clauses. */
    public SpanOrQuery(SpanQuery[] clauses) {
  
      // copy clauses array into an ArrayList
      this.clauses = new ArrayList(clauses.length);
      for (int i = 0; i < clauses.length; i++) {
        SpanQuery clause = clauses[i];
        if (i == 0) {                               // check field
          field = clause.getField();
        } else if (!clause.getField().equals(field)) {
          throw new IllegalArgumentException("Clauses must have same field.");
        }
        this.clauses.add(clause);
      }
    }
  
    /** Return the clauses whose spans are matched. */
    public SpanQuery[] getClauses() {
      return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]);
    }
  
    public String getField() { return field; }
  
    public Collection getTerms() {
      Collection terms = new ArrayList();
      Iterator i = clauses.iterator();
      while (i.hasNext()) {
        SpanQuery clause = (SpanQuery)i.next();
        terms.addAll(clause.getTerms());
      }
      return terms;
    }
  
    public String toString(String field) {
      StringBuffer buffer = new StringBuffer();
      buffer.append("spanOr([");
      Iterator i = clauses.iterator();
      while (i.hasNext()) {
        SpanQuery clause = (SpanQuery)i.next();
        buffer.append(clause.toString(field));
        if (i.hasNext()) {
          buffer.append(", ");
        }
      }
      buffer.append("])");
      return buffer.toString();
    }
  
    public Spans getSpans(final IndexReader reader) throws IOException {
      if (clauses.size() == 1)                      // optimize 1-clause case
        return ((SpanQuery)clauses.get(0)).getSpans(reader);
  
      return new Spans() {
          private List all = new ArrayList(clauses.size());
          private SpanQueue queue = new SpanQueue(clauses.size());
  
          {
            Iterator i = clauses.iterator();
            while (i.hasNext()) {                   // initialize all
              all.add(((SpanQuery)i.next()).getSpans(reader)); 
            }
          }
          
          private boolean firstTime = true;
  
          public boolean next() throws IOException {
            if (firstTime) {                        // first time -- initialize
              for (int i = 0; i < all.size(); i++) {
                Spans spans = (Spans)all.get(i);
                if (spans.next()) {                 // move to first entry
                  queue.put(spans);                 // build queue
                }
              }
              firstTime = false;
              return queue.size() != 0;
            }
  
            if (queue.size() == 0) {                // all done
              return false;
            }
  
            if (top().next()) {                       // move to next
              queue.adjustTop();
              return true;
            }
  
            queue.pop();                            // exhausted a clause
            return queue.size() != 0;
          }
  
          private Spans top() { return (Spans)queue.top(); }
  
          public boolean skipTo(int target) throws IOException {
            queue.clear();                          // clear the queue
            for (int i = 0; i < all.size(); i++) {
              Spans spans = (Spans)all.get(i);
              if (spans.skipTo(target)) {           // skip each spans in all
                queue.put(spans);                   // rebuild queue
              }
            }
            firstTime = false;
            return queue.size() != 0;
          }
  
          public int doc() { return top().doc(); }
          public int start() { return top().start(); }
          public int end() { return top().end(); }
  
          public String toString() {
            return "spans(" + SpanOrQuery.this.toString() + ")";
          }
  
        };
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java
  
  Index: SpanQuery.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.Collection;
  
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.Weight;
  import org.apache.lucene.search.Searcher;
  
  /** Base class for span-based queries. */
  public abstract class SpanQuery extends Query {
    /** Expert: Returns the matches for this query in an index.  Used internally
     * to search for spans. */
    public abstract Spans getSpans(IndexReader reader) throws IOException;
  
    /** Returns the name of the field matched by this query.*/
    public abstract String getField();
  
    /** Returns a collection of all terms matched by this query.*/
    public abstract Collection getTerms();
  
    protected Weight createWeight(Searcher searcher) {
      return new SpanWeight(this, searcher);
    }
  
  }
  
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanQueue.java
  
  Index: SpanQueue.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import org.apache.lucene.util.PriorityQueue;
  
  class SpanQueue extends PriorityQueue {
    public SpanQueue(int size) {
      initialize(size);
    }
  
    protected final boolean lessThan(Object o1, Object o2) {
      Spans spans1 = (Spans)o1;
      Spans spans2 = (Spans)o2;
      if (spans1.doc() == spans2.doc()) {
        if (spans1.start() == spans2.start()) {
          return spans1.end() < spans2.end();
        } else {
          return spans1.start() < spans2.start();
        }
      } else {
        return spans1.doc() < spans2.doc();
      }
    }
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanScorer.java
  
  Index: SpanScorer.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.search.Weight;
  import org.apache.lucene.search.Searcher;
  import org.apache.lucene.search.Scorer;
  import org.apache.lucene.search.Explanation;
  import org.apache.lucene.search.Similarity;
  
  
  class SpanScorer extends Scorer {
    private Spans spans;
    private Weight weight;
    private byte[] norms;
    private float value;
  
    private boolean firstTime = true;
    private boolean more = true;
  
    private int doc;
    private float freq;
  
    SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms)
      throws IOException {
      super(similarity);
      this.spans = spans;
      this.norms = norms;
      this.weight = weight;
      this.value = weight.getValue();
    }
  
    public boolean next() throws IOException {
      if (firstTime) {
        more = spans.next();
        firstTime = false;
      }
  
      if (!more) return false;
  
      freq = 0.0f;
      doc = spans.doc();
  
      while (more && doc == spans.doc()) {
        int matchLength = spans.end() - spans.start();
        freq += getSimilarity().sloppyFreq(matchLength);
        more = spans.next();
      }
  
      return more || freq != 0.0f;
    }
  
    public int doc() { return doc; }
  
    public float score() throws IOException {
      float raw = getSimilarity().tf(freq) * value; // raw score
      return raw * Similarity.decodeNorm(norms[doc]); // normalize
    }
  
    public boolean skipTo(int target) throws IOException {
      more = spans.skipTo(target);
  
      if (!more) return false;
  
      freq = 0.0f;
      doc = spans.doc();
  
      while (more && spans.doc() == target) {
        freq += getSimilarity().sloppyFreq(spans.end() - spans.start());
        more = spans.next();
      }
  
      return more || freq != 0.0f;
    }
  
    public Explanation explain(final int doc) throws IOException {
      Explanation tfExplanation = new Explanation();
  
      skipTo(doc);
  
      float phraseFreq = (doc() == doc) ? freq : 0.0f;
      tfExplanation.setValue(getSimilarity().tf(phraseFreq));
      tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
  
      return tfExplanation;
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
  
  Index: SpanTermQuery.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.Collection;
  import java.util.ArrayList;
  
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.index.TermPositions;
  
  /** Matches spans containing a term. */
  public class SpanTermQuery extends SpanQuery {
    private Term term;
  
    /** Construct a SpanTermQuery matching the named term's spans. */
    public SpanTermQuery(Term term) { this.term = term; }
  
    /** Return the term whose spans are matched. */
    public Term getTerm() { return term; }
  
    public String getField() { return term.field(); }
  
    public Collection getTerms() {
      Collection terms = new ArrayList();
      terms.add(term);
      return terms;
    }
  
    public String toString(String field) {
      if (term.field().equals(field))
        return term.text();
      else
        return term.toString();
    }
  
    public Spans getSpans(final IndexReader reader) throws IOException {
      return new Spans() {
          private TermPositions positions = reader.termPositions(term);
  
          private int doc;
          private int freq;
          private int count;
          private int position;
          
          public boolean next() throws IOException {
            if (count == freq) {
              if (!positions.next())
                return false;
              doc = positions.doc();
              freq = positions.freq();
              count = 0;
            }
            position = positions.nextPosition();
            count++;
            return true;
          }
  
          public boolean skipTo(int target) throws IOException {
            if (!positions.skipTo(target))
              return false;
  
            doc = positions.doc();
            freq = positions.freq();
            count = 0;
  
            position = positions.nextPosition();
            count++;
  
            return true;
          }
  
          public int doc() { return doc; }
          public int start() { return position; }
          public int end() { return position + 1; }
  
          public String toString() {
            return "spans(" + SpanTermQuery.this.toString() + ")";
          }
  
        };
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
  
  Index: SpanWeight.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  import java.util.Iterator;
  import java.util.Collection;
  
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.Term;
  
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.Weight;
  import org.apache.lucene.search.Searcher;
  import org.apache.lucene.search.Scorer;
  import org.apache.lucene.search.Explanation;
  import org.apache.lucene.search.Similarity;
  
  class SpanWeight implements Weight {
    private Searcher searcher;
    private float value;
    private float idf;
    private float queryNorm;
    private float queryWeight;
  
    private Collection terms;
    private SpanQuery query;
  
    public SpanWeight(SpanQuery query, Searcher searcher) {
      this.searcher = searcher;
      this.query = query;
      this.terms = query.getTerms();
    }
  
    public Query getQuery() { return query; }
    public float getValue() { return value; }
  
    public float sumOfSquaredWeights() throws IOException {
      idf = searcher.getSimilarity().idf(terms, searcher);
      queryWeight = idf * query.getBoost();         // compute query weight
      return queryWeight * queryWeight;             // square it
    }
  
    public void normalize(float queryNorm) {
      this.queryNorm = queryNorm;
      queryWeight *= queryNorm;                     // normalize query weight
      value = queryWeight * idf;                    // idf for document 
    }
  
    public Scorer scorer(IndexReader reader) throws IOException {
      return new SpanScorer(query.getSpans(reader), this,
                            searcher.getSimilarity(),
                            reader.norms(query.getField()));
    }
  
    public Explanation explain(IndexReader reader, int doc)
      throws IOException {
  
      Explanation result = new Explanation();
      result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
      String field = ((SpanQuery)getQuery()).getField();
  
      StringBuffer docFreqs = new StringBuffer();
      Iterator i = terms.iterator();
      while (i.hasNext()) {
        Term term = (Term)i.next();
        docFreqs.append(term.text());
        docFreqs.append("=");
        docFreqs.append(searcher.docFreq(term));
  
        if (i.hasNext()) {
          docFreqs.append(" ");
        }
      }
  
      Explanation idfExpl =
        new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
        
      // explain query weight
      Explanation queryExpl = new Explanation();
      queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
  
      Explanation boostExpl = new Explanation(getQuery().getBoost(), "boost");
      if (getQuery().getBoost() != 1.0f)
        queryExpl.addDetail(boostExpl);
      queryExpl.addDetail(idfExpl);
        
      Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
      queryExpl.addDetail(queryNormExpl);
        
      queryExpl.setValue(boostExpl.getValue() *
                         idfExpl.getValue() *
                         queryNormExpl.getValue());
  
      result.addDetail(queryExpl);
       
      // explain field weight
      Explanation fieldExpl = new Explanation();
      fieldExpl.setDescription("fieldWeight("+field+":"+query.toString(field)+
                               " in "+doc+"), product of:");
  
      Explanation tfExpl = scorer(reader).explain(doc);
      fieldExpl.addDetail(tfExpl);
      fieldExpl.addDetail(idfExpl);
  
      Explanation fieldNormExpl = new Explanation();
      byte[] fieldNorms = reader.norms(field);
      float fieldNorm =
        fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
      fieldNormExpl.setValue(fieldNorm);
      fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
      fieldExpl.addDetail(fieldNormExpl);
  
      fieldExpl.setValue(tfExpl.getValue() *
                         idfExpl.getValue() *
                         fieldNormExpl.getValue());
        
      result.addDetail(fieldExpl);
  
      // combine them
      result.setValue(queryExpl.getValue() * fieldExpl.getValue());
  
      if (queryExpl.getValue() == 1.0f)
        return fieldExpl;
  
      return result;
    }
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/Spans.java
  
  Index: Spans.java
  ===================================================================
  package org.apache.lucene.search.spans;
  
  import java.io.IOException;
  
  /** Expert: an enumeration of span matches.  Used to implement span searching.
   * Each span represents a range of term positions within a document.  Matches
   * are enumerated in order, by increasing document number, within that by
   * increasing start position and finally by increasing end position. */
  public interface Spans {
    /** Move to the next match, returning true iff any such exists. */
    boolean next() throws IOException;
  
    /** Skips to the first match beyond the current whose document number is
     * greater than or equal to <i>target</i>. <p>Returns true iff there is such
     * a match.  <p>Behaves as if written: <pre>
     *   boolean skipTo(int target) {
     *     do {
     *       if (!next())
     * 	     return false;
     *     } while (target > doc());
     *     return true;
     *   }
     * </pre>
     * Most implementations are considerably more efficient than that.
     */
    boolean skipTo(int target) throws IOException;
  
    /** Returns the document number of the current match.  Initially invalid. */
    int doc();
  
    /** Returns the start position of the current match.  Initially invalid. */
    int start();
  
    /** Returns the end position of the current match.  Initially invalid. */
    int end();
  
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/spans/package.html
  
  Index: package.html
  ===================================================================
  <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
  <html>
  <head></head>
  <body>
  The calculus of spans.
  </body>
  </html>
  
  
  
  1.2       +162 -11   jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java
  
  Index: TestBasics.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TestBasics.java	15 Jan 2004 22:42:34 -0000	1.1
  +++ TestBasics.java	30 Jan 2004 22:10:00 -0000	1.2
  @@ -55,6 +55,12 @@
    */
   
   import junit.framework.TestCase;
  +
  +import java.io.IOException;
  +
  +import java.util.Set;
  +import java.util.TreeSet;
  +
   import org.apache.lucene.util.English;
   import org.apache.lucene.analysis.SimpleAnalyzer;
   import org.apache.lucene.document.Document;
  @@ -63,9 +69,19 @@
   import org.apache.lucene.index.Term;
   import org.apache.lucene.store.RAMDirectory;
   
  +import org.apache.lucene.search.spans.*;
  +
   /**
    * Tests basic search capabilities.
    *
  + * <p>Uses a collection of 1000 documents, each the english rendition of their
  + * document number.  For example, the document numbered 333 has text "three
  + * hundred thirty three".
  + *
  + * <p>Tests are each a single query, and its hits are checked to ensure that
  + * all and only the correct documents are returned, thus providing end-to-end
  + * testing of the indexing and search code.
  + *
    * @author Doug Cutting
    */
   public class TestBasics extends TestCase {
  @@ -90,46 +106,181 @@
   
     public void testTerm() throws Exception {
       Query query = new TermQuery(new Term("field", "seventy"));
  -    Hits hits = searcher.search(query);
  -    assertEquals(100, hits.length());
  +    checkHits(query, new int[]
  +      {70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175,
  +       176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279,
  +       370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, 472, 473,
  +       474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, 575, 576, 577,
  +       578, 579, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 770, 771,
  +       772, 773, 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874, 875,
  +       876, 877, 878, 879, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979});
     }
   
     public void testTerm2() throws Exception {
       Query query = new TermQuery(new Term("field", "seventish"));
  -    Hits hits = searcher.search(query);
  -    assertEquals(0, hits.length());
  +    checkHits(query, new int[] {});
     }
   
     public void testPhrase() throws Exception {
       PhraseQuery query = new PhraseQuery();
       query.add(new Term("field", "seventy"));
       query.add(new Term("field", "seven"));
  -    Hits hits = searcher.search(query);
  -    assertEquals(10, hits.length());
  +    checkHits(query, new int[]
  +      {77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
     }
   
     public void testPhrase2() throws Exception {
       PhraseQuery query = new PhraseQuery();
       query.add(new Term("field", "seventish"));
       query.add(new Term("field", "sevenon"));
  -    Hits hits = searcher.search(query);
  -    assertEquals(0, hits.length());
  +    checkHits(query, new int[] {});
     }
   
     public void testBoolean() throws Exception {
       BooleanQuery query = new BooleanQuery();
       query.add(new TermQuery(new Term("field", "seventy")), true, false);
       query.add(new TermQuery(new Term("field", "seven")), true, false);
  -    Hits hits = searcher.search(query);
  -    assertEquals(19, hits.length());
  +    checkHits(query, new int[]
  +      {77, 777, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775,
  +       776, 778, 779, 877, 977});
     }
   
     public void testBoolean2() throws Exception {
       BooleanQuery query = new BooleanQuery();
       query.add(new TermQuery(new Term("field", "sevento")), true, false);
       query.add(new TermQuery(new Term("field", "sevenly")), true, false);
  +    checkHits(query, new int[] {});
  +  }
  +
  +  public void testSpanNearExact() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy"));
  +    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "seven"));
  +    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
  +                                            0, true);
  +    checkHits(query, new int[]
  +      {77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
  +
  +    //System.out.println(searcher.explain(query, 77));
  +    //System.out.println(searcher.explain(query, 977));
  +  }
  +
  +  public void testSpanNearUnordered() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
  +    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
  +    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
  +                                            4, false);
  +
  +    checkHits(query, new int[]
  +      {609, 629, 639, 649, 659, 669, 679, 689, 699,
  +       906, 926, 936, 946, 956, 966, 976, 986, 996});
  +  }
  +
  +  public void testSpanNearOrdered() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
  +    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
  +    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
  +                                            4, true);
  +    checkHits(query, new int[]
  +      {906, 926, 936, 946, 956, 966, 976, 986, 996});
  +  }
  +
  +  public void testSpanNot() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
  +    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
  +    SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
  +                                           4, true);
  +    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
  +    SpanNotQuery query = new SpanNotQuery(near, term3);
  +
  +    checkHits(query, new int[]
  +      {801, 821, 831, 851, 861, 871, 881, 891});
  +
  +    //System.out.println(searcher.explain(query, 801));
  +    //System.out.println(searcher.explain(query, 891));
  +  }
  +
  +  public void testSpanFirst() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
  +    SpanFirstQuery query = new SpanFirstQuery(term1, 1);
  +
  +    checkHits(query, new int[]
  +      {5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513,
  +       514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527,
  +       528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541,
  +       542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555,
  +       556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569,
  +       570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583,
  +       584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
  +       598, 599});
  +
  +    //System.out.println(searcher.explain(query, 5));
  +    //System.out.println(searcher.explain(query, 599));
  +
  +  }
  +
  +  public void testSpanOr() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "thirty"));
  +    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "three"));
  +    SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2},
  +                                            0, true);
  +    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
  +    SpanTermQuery term4 = new SpanTermQuery(new Term("field", "seven"));
  +    SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4},
  +                                            0, true);
  +
  +    SpanOrQuery query = new SpanOrQuery(new SpanQuery[] {near1, near2});
  +
  +    checkHits(query, new int[]
  +      {33, 47, 133, 147, 233, 247, 333, 347, 433, 447, 533, 547, 633, 647, 733,
  +       747, 833, 847, 933, 947});
  +
  +    //System.out.println(searcher.explain(query, 33));
  +    //System.out.println(searcher.explain(query, 947));
  +  }
  +
  +  public void testSpanExactNested() throws Exception {
  +    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "three"));
  +    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
  +    SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2},
  +                                            0, true);
  +    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "thirty"));
  +    SpanTermQuery term4 = new SpanTermQuery(new Term("field", "three"));
  +    SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4},
  +                                            0, true);
  +
  +    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {near1, near2},
  +                                            0, true);
  +
  +    checkHits(query, new int[] {333});
  +
  +    //System.out.println(searcher.explain(query, 333));
  +  }
  +
  +  private void checkHits(Query query, int[] results) throws IOException {
  +    Hits hits = searcher.search(query);
  +
  +    Set correct = new TreeSet();
  +    for (int i = 0; i < results.length; i++) {
  +      correct.add(new Integer(results[i]));
  +    }
  +
  +    Set actual = new TreeSet();
  +    for (int i = 0; i < hits.length(); i++) {
  +      actual.add(new Integer(hits.id(i)));
  +    }
  +
  +    assertEquals(query.toString("field"), correct, actual);
  +  }
  +
  +  private void printHits(Query query) throws IOException {
       Hits hits = searcher.search(query);
  -    assertEquals(0, hits.length());
  +    System.out.print("new int[] {");
  +    for (int i = 0; i < hits.length(); i++) {
  +      System.out.print(hits.id(i));
  +      if (i != hits.length()-1)
  +        System.out.print(", ");
  +    }
  +    System.out.println("}");
     }
   
   }
  
  
  
  1.3       +3 -3      jakarta-lucene/src/test/org/apache/lucene/search/TestSimilarity.java
  
  Index: TestSimilarity.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSimilarity.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TestSimilarity.java	29 Jan 2003 17:43:13 -0000	1.2
  +++ TestSimilarity.java	30 Jan 2004 22:10:00 -0000	1.3
  @@ -56,7 +56,7 @@
   
   import junit.framework.TestCase;
   
  -import java.util.Vector;
  +import java.util.Collection;
   
   import org.apache.lucene.index.Term;
   import org.apache.lucene.index.IndexWriter;
  @@ -81,7 +81,7 @@
       public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
       public float tf(float freq) { return freq; }
       public float sloppyFreq(int distance) { return 2.0f; }
  -    public float idf(Vector terms, Searcher searcher) { return 1.0f; }
  +    public float idf(Collection terms, Searcher searcher) { return 1.0f; }
       public float idf(int docFreq, int numDocs) { return 1.0f; }
       public float coord(int overlap, int maxOverlap) { return 1.0f; }
     }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message