lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/test/org/apache/lucene/search TestBasics.java
Date Mon, 09 Feb 2004 22:03:42 GMT
cutting     2004/02/09 14:03:42

  Modified:    src/java/org/apache/lucene/search/spans NearSpans.java
                        SpanOrQuery.java SpanTermQuery.java
               src/test/org/apache/lucene/search TestBasics.java
  Removed:     src/java/org/apache/lucene/search/spans SpanQueue.java
  Log:
  Fixed a few bugs in span searching.
  
  Revision  Changes    Path
  1.3       +49 -21    jakarta-lucene/src/java/org/apache/lucene/search/spans/NearSpans.java
  
  Index: NearSpans.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/spans/NearSpans.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- NearSpans.java	2 Feb 2004 13:27:52 -0000	1.2
  +++ NearSpans.java	9 Feb 2004 22:03:42 -0000	1.3
  @@ -23,6 +23,7 @@
   import java.util.Iterator;
   
   import org.apache.lucene.index.IndexReader;
  +import org.apache.lucene.util.PriorityQueue;
   
   class NearSpans implements Spans {
     private SpanNearQuery query;
  @@ -36,22 +37,48 @@
   
     private int totalLength;                        // sum of current lengths
   
  -  private SpanQueue queue;                        // sorted queue of spans
  +  private CellQueue queue;                        // sorted queue of spans
     private SpansCell max;                          // max element in queue
   
     private boolean more = true;                    // true iff not done
     private boolean firstTime = true;               // true before first next()
   
  -  private boolean queueStale = false;             // true if queue not sorted
  -  private boolean listStale = true;               // true if list not sorted
  +  private class CellQueue extends PriorityQueue {
  +    public CellQueue(int size) {
  +      initialize(size);
  +    }
  +    
  +    protected final boolean lessThan(Object o1, Object o2) {
  +      SpansCell spans1 = (SpansCell)o1;
  +      SpansCell spans2 = (SpansCell)o2;
  +      if (spans1.doc() == spans2.doc()) {
  +        if (spans1.start() == spans2.start()) {
  +          if (spans1.end() == spans2.end()) {
  +            return spans1.index > spans2.index;
  +          } else {
  +            return spans1.end() < spans2.end();
  +          }
  +        } else {
  +          return spans1.start() < spans2.start();
  +        }
  +      } else {
  +        return spans1.doc() < spans2.doc();
  +      }
  +    }
  +  }
  +
   
     /** Wraps a Spans, and can be used to form a linked list. */
     private class SpansCell implements Spans {
       private Spans spans;
       private SpansCell next;
       private int length = -1;
  +    private int index;
   
  -    public SpansCell(Spans spans) { this.spans = spans; }
  +    public SpansCell(Spans spans, int index) {
  +      this.spans = spans;
  +      this.index = index;
  +    }
   
       public boolean next() throws IOException {
         if (length != -1)                           // subtract old length
  @@ -93,7 +120,7 @@
       public int start() { return spans.start(); }
       public int end() { return spans.end(); }
   
  -    public String toString() { return spans.toString(); }
  +    public String toString() { return spans.toString() + "#" + index; }
     }
   
     public NearSpans(SpanNearQuery query, IndexReader reader)
  @@ -103,10 +130,10 @@
       this.inOrder = query.isInOrder();
   
       SpanQuery[] clauses = query.getClauses();     // initialize spans & list
  -    queue = new SpanQueue(clauses.length);
  +    queue = new CellQueue(clauses.length);
       for (int i = 0; i < clauses.length; i++) {
         SpansCell cell =                            // construct clause spans
  -        new SpansCell(clauses[i].getSpans(reader));
  +        new SpansCell(clauses[i].getSpans(reader), i);
         ordered.add(cell);                          // add to ordered
       }
     }
  @@ -114,18 +141,21 @@
     public boolean next() throws IOException {
       if (firstTime) {
         initList(true);
  -      listToQueue();                            // initialize queue
  +      listToQueue();                              // initialize queue
         firstTime = false;
  -    } else {
  -      more = last.next();                         // trigger scan
  -      queueStale = true;
  +    } else if (more) {
  +      more = min().next();                        // trigger further scanning
  +      if (more)
  +        queue.adjustTop();                        // maintain queue
       }
   
       while (more) {
   
  -      if (listStale) {                            // maintain list
  +      boolean queueStale = false;
  +
  +      if (min().doc() != max.doc()) {             // maintain list
           queueToList();
  -        listStale = false;
  +        queueStale = true;
         }
   
         // skip to doc w/ all clauses
  @@ -152,13 +182,8 @@
         }
   
         more = min().next();                        // trigger further scanning
  -
  -      if (more) {
  +      if (more)
           queue.adjustTop();                        // maintain queue
  -        if (min().doc() != max.doc()) {
  -          listStale = true;                       // maintain list
  -        }
  -      }
       }
       return false;                                 // no more matches
     }
  @@ -175,7 +200,6 @@
   
       if (more) {
         listToQueue();
  -      listStale = true;
   
         if (min().doc() == max.doc()) {             // at a match?
           int matchLength = max.end() - min().start();
  @@ -183,6 +207,7 @@
             return true;
           }
         }
  +
         return next();                              // no, scan
       }
   
  @@ -195,7 +220,10 @@
     public int start() { return min().start(); }
     public int end() { return max.end(); }
   
  -  public String toString() { return "spans(" + query.toString() + ")"; }
  +  public String toString() {
  +    return "spans("+query.toString()+")@"+
  +      (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
  +  }
   
     private void initList(boolean next) throws IOException {
       for (int i = 0; more && i < ordered.size(); i++) {
  
  
  
  1.3       +48 -9     jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
  
  Index: SpanOrQuery.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SpanOrQuery.java	2 Feb 2004 13:27:52 -0000	1.2
  +++ SpanOrQuery.java	9 Feb 2004 22:03:42 -0000	1.3
  @@ -24,6 +24,7 @@
   import java.util.Iterator;
   
   import org.apache.lucene.index.IndexReader;
  +import org.apache.lucene.util.PriorityQueue;
   
   /** Matches the union of its clauses.*/
   public class SpanOrQuery extends SpanQuery {
  @@ -78,6 +79,27 @@
       return buffer.toString();
     }
   
  +  private class SpanQueue extends PriorityQueue {
  +    public SpanQueue(int size) {
  +      initialize(size);
  +    }
  +
  +    protected final boolean lessThan(Object o1, Object o2) {
  +      Spans spans1 = (Spans)o1;
  +      Spans spans2 = (Spans)o2;
  +      if (spans1.doc() == spans2.doc()) {
  +        if (spans1.start() == spans2.start()) {
  +          return spans1.end() < spans2.end();
  +        } else {
  +          return spans1.start() < spans2.start();
  +        }
  +      } else {
  +        return spans1.doc() < spans2.doc();
  +      }
  +    }
  +  }
  +
  +
     public Spans getSpans(final IndexReader reader) throws IOException {
       if (clauses.size() == 1)                      // optimize 1-clause case
         return ((SpanQuery)clauses.get(0)).getSpans(reader);
  @@ -101,6 +123,8 @@
                 Spans spans = (Spans)all.get(i);
                 if (spans.next()) {                 // move to first entry
                   queue.put(spans);                 // build queue
  +              } else {
  +                all.remove(i--);
                 }
               }
               firstTime = false;
  @@ -111,26 +135,39 @@
               return false;
             }
   
  -          if (top().next()) {                       // move to next
  +          if (top().next()) {                     // move to next
               queue.adjustTop();
               return true;
             }
   
  -          queue.pop();                            // exhausted a clause
  +          all.remove(queue.pop());                // exhausted a clause
  +
             return queue.size() != 0;
           }
   
           private Spans top() { return (Spans)queue.top(); }
   
           public boolean skipTo(int target) throws IOException {
  -          queue.clear();                          // clear the queue
  -          for (int i = 0; i < all.size(); i++) {
  -            Spans spans = (Spans)all.get(i);
  -            if (spans.skipTo(target)) {           // skip each spans in all
  -              queue.put(spans);                   // rebuild queue
  +          if (firstTime) {
  +            for (int i = 0; i < all.size(); i++) {
  +              Spans spans = (Spans)all.get(i);
  +              if (spans.skipTo(target)) {         // skip each spans in all
  +                queue.put(spans);                 // build queue
  +              } else {
  +                all.remove(i--);
  +              }
  +            }
  +            firstTime = false;
  +          } else {
  +            while (queue.size() != 0 && top().doc() < target) {
  +              if (top().skipTo(target)) {
  +                queue.adjustTop();
  +              } else {
  +                all.remove(queue.pop());
  +              }
               }
             }
  -          firstTime = false;
  +
             return queue.size() != 0;
           }
   
  @@ -139,7 +176,9 @@
           public int end() { return top().end(); }
   
           public String toString() {
  -          return "spans(" + SpanOrQuery.this.toString() + ")";
  +          return "spans("+SpanOrQuery.this+")@"+
  +            (firstTime?"START"
  +             :(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
           }
   
         };
  
  
  
  1.3       +9 -4      jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
  
  Index: SpanTermQuery.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SpanTermQuery.java	2 Feb 2004 13:27:52 -0000	1.2
  +++ SpanTermQuery.java	9 Feb 2004 22:03:42 -0000	1.3
  @@ -54,15 +54,17 @@
       return new Spans() {
           private TermPositions positions = reader.termPositions(term);
   
  -        private int doc;
  +        private int doc = -1;
           private int freq;
           private int count;
           private int position;
   
           public boolean next() throws IOException {
             if (count == freq) {
  -            if (!positions.next())
  +            if (!positions.next()) {
  +              doc = Integer.MAX_VALUE;
                 return false;
  +            }
               doc = positions.doc();
               freq = positions.freq();
               count = 0;
  @@ -73,8 +75,10 @@
           }
   
           public boolean skipTo(int target) throws IOException {
  -          if (!positions.skipTo(target))
  +          if (!positions.skipTo(target)) {
  +            doc = Integer.MAX_VALUE;
               return false;
  +          }
   
             doc = positions.doc();
             freq = positions.freq();
  @@ -91,7 +95,8 @@
           public int end() { return position + 1; }
   
           public String toString() {
  -          return "spans(" + SpanTermQuery.this.toString() + ")";
  +          return "spans(" + SpanTermQuery.this.toString() + ")@"+
  +            (doc==-1?"START":(doc==Integer.MAX_VALUE)?"END":doc+"-"+position);
           }
   
         };
  
  
  
  1.3       +24 -0     jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java
  
  Index: TestBasics.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TestBasics.java	30 Jan 2004 22:10:00 -0000	1.2
  +++ TestBasics.java	9 Feb 2004 22:03:42 -0000	1.3
  @@ -256,6 +256,30 @@
       //System.out.println(searcher.explain(query, 333));
     }
   
  +  public void testSpanNearOr() throws Exception {
  +
  +    SpanTermQuery t1 = new SpanTermQuery(new Term("field","six"));
  +    SpanTermQuery t3 = new SpanTermQuery(new Term("field","seven"));
  +    
  +    SpanTermQuery t5 = new SpanTermQuery(new Term("field","seven"));
  +    SpanTermQuery t6 = new SpanTermQuery(new Term("field","six"));
  +
  +    SpanOrQuery to1 = new SpanOrQuery(new SpanQuery[] {t1, t3});
  +    SpanOrQuery to2 = new SpanOrQuery(new SpanQuery[] {t5, t6});
  +    
  +    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {to1, to2},
  +                                            10, true);
  +
  +    checkHits(query, new int[]
  +      {606, 607, 626, 627, 636, 637, 646, 647, 
  +       656, 657, 666, 667, 676, 677, 686, 687, 696, 697,
  +       706, 707, 726, 727, 736, 737, 746, 747, 
  +       756, 757, 766, 767, 776, 777, 786, 787, 796, 797});
  +  }
  +
  +
  +
  +
     private void checkHits(Query query, int[] results) throws IOException {
       Hits hits = searcher.search(query);
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message