lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/java/org/apache/lucene/index IndexReader.java SegmentReader.java SegmentTermDocs.java SegmentTermPositions.java SegmentsReader.java TermDocs.java TermEnum.java
Date Mon, 21 Jan 2002 17:07:23 GMT
cutting     02/01/21 09:07:23

  Modified:    src/java/org/apache/lucene/index IndexReader.java
                        SegmentReader.java SegmentTermDocs.java
                        SegmentTermPositions.java SegmentsReader.java
                        TermDocs.java TermEnum.java
  Log:
  Substantially improved the performance of DateFilter by adding the
  ability to reuse TermDocs objects.
  
  Revision  Changes    Path
  1.6       +18 -4     jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
  
  Index: IndexReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- IndexReader.java	26 Dec 2001 17:23:05 -0000	1.5
  +++ IndexReader.java	21 Jan 2002 17:07:23 -0000	1.6
  @@ -193,7 +193,7 @@
     abstract public int docFreq(Term t) throws IOException;
   
     /** Returns an enumeration of all the documents which contain
  -    <code>Term</code>. For each document, the document number, the frequency
of
  +    <code>term</code>. For each document, the document number, the frequency
of
       the term in that document is also provided, for use in search scoring.
       Thus, this method implements the mapping:
       <p><ul>
  @@ -201,10 +201,17 @@
       </ul>
       <p>The enumeration is ordered by document number.  Each document number
       is greater than all that precede it in the enumeration. */
  -  abstract public TermDocs termDocs(Term t) throws IOException;
  +  public TermDocs termDocs(Term term) throws IOException {
  +    TermDocs termDocs = termDocs();
  +    termDocs.seek(term);
  +    return termDocs;
  +  }
  +
  +  /** Returns an unpositioned {@link TermDocs} enumerator. */
  +  abstract public TermDocs termDocs() throws IOException;
   
     /** Returns an enumeration of all the documents which contain
  -    <code>Term</code>.  For each document, in addition to the document number
  +    <code>term</code>.  For each document, in addition to the document number
       and frequency of the term in that document, a list of all of the ordinal
       positions of the term in the document is available.  Thus, this method
       implements the mapping:
  @@ -218,7 +225,14 @@
       <p> This positional information faciliates phrase and proximity searching.
       <p>The enumeration is ordered by document number.  Each document number is
       greater than all that precede it in the enumeration. */
  -  abstract public TermPositions termPositions(Term t) throws IOException;
  +  public TermPositions termPositions(Term term) throws IOException {
  +    TermPositions termPositions = termPositions();
  +    termPositions.seek(term);
  +    return termPositions;
  +  }
  +
  +  /** Returns an unpositioned {@link TermPositions} enumerator. */
  +  abstract public TermPositions termPositions() throws IOException;
   
     /** Deletes the document numbered <code>docNum</code>.  Once a document is
       deleted it will not appear in TermDocs or TermPostitions enumerations.
  
  
  
  1.3       +6 -22     jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
  
  Index: SegmentReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SegmentReader.java	27 Sep 2001 16:27:01 -0000	1.2
  +++ SegmentReader.java	21 Jan 2002 17:07:23 -0000	1.3
  @@ -78,8 +78,8 @@
     BitVector deletedDocs = null;
     private boolean deletedDocsDirty = false;
   
  -  private InputStream freqStream;
  -  private InputStream proxStream;
  +  InputStream freqStream;
  +  InputStream proxStream;
   
   
     private static class Norm {
  @@ -194,28 +194,12 @@
       return (deletedDocs != null && deletedDocs.get(n));
     }
   
  -  public final TermDocs termDocs(Term t) throws IOException {
  -    TermInfo ti = tis.get(t);
  -    if (ti != null)
  -      return new SegmentTermDocs(this, ti);
  -    else
  -      return null;
  +  public final TermDocs termDocs() throws IOException {
  +    return new SegmentTermDocs(this);
     }
   
  -  final InputStream getFreqStream () {
  -    return (InputStream)freqStream.clone();
  -  }
  -
  -  public final TermPositions termPositions(Term t) throws IOException {
  -    TermInfo ti = tis.get(t);
  -    if (ti != null)
  -      return new SegmentTermPositions(this, ti);
  -    else
  -      return null;
  -  }
  -
  -  final InputStream getProxStream () {
  -    return (InputStream)proxStream.clone();
  +  public final TermPositions termPositions() throws IOException {
  +    return new SegmentTermPositions(this);
     }
   
     public final int docFreq(Term t) throws IOException {
  
  
  
  1.2       +15 -10    jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java
  
  Index: SegmentTermDocs.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SegmentTermDocs.java	18 Sep 2001 16:29:54 -0000	1.1
  +++ SegmentTermDocs.java	21 Jan 2002 17:07:23 -0000	1.2
  @@ -66,21 +66,26 @@
     int doc = 0;
     int freq;
   
  -  SegmentTermDocs(SegmentReader p) throws IOException {
  -    parent = p;
  -    freqStream = parent.getFreqStream();
  -    deletedDocs = parent.deletedDocs;
  +  SegmentTermDocs(SegmentReader parent)
  +    throws IOException {
  +    this.parent = parent;
  +    this.freqStream = (InputStream)parent.freqStream.clone();
  +    this.deletedDocs = parent.deletedDocs;
     }
  -
  -  SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
  -    this(p);
  +  
  +  public void seek(Term term) throws IOException {
  +    TermInfo ti = parent.tis.get(term);
       seek(ti);
     }
     
     void seek(TermInfo ti) throws IOException {
  -    freqCount = ti.docFreq;
  -    doc = 0;
  -    freqStream.seek(ti.freqPointer);
  +    if (ti == null) {
  +      freqCount = 0;
  +    } else {
  +      freqCount = ti.docFreq;
  +      doc = 0;
  +      freqStream.seek(ti.freqPointer);
  +    }
     }
     
     public void close() throws IOException {
  
  
  
  1.2       +1 -7      jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java
  
  Index: SegmentTermPositions.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SegmentTermPositions.java	18 Sep 2001 16:29:54 -0000	1.1
  +++ SegmentTermPositions.java	21 Jan 2002 17:07:23 -0000	1.2
  @@ -66,13 +66,7 @@
     
     SegmentTermPositions(SegmentReader p) throws IOException {
       super(p);
  -    proxStream = parent.getProxStream();
  -  }
  -
  -  SegmentTermPositions(SegmentReader p, TermInfo ti)
  -       throws IOException {
  -    this(p);
  -    seek(ti);
  +    this.proxStream = (InputStream)parent.proxStream.clone();
     }
   
     final void seek(TermInfo ti) throws IOException {
  
  
  
  1.3       +38 -20    jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java
  
  Index: SegmentsReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SegmentsReader.java	11 Oct 2001 22:44:23 -0000	1.2
  +++ SegmentsReader.java	21 Jan 2002 17:07:23 -0000	1.3
  @@ -151,12 +151,12 @@
       return total;
     }
   
  -  public final TermDocs termDocs(Term term) throws IOException {
  -    return new SegmentsTermDocs(readers, starts, term);
  +  public final TermDocs termDocs() throws IOException {
  +    return new SegmentsTermDocs(readers, starts);
     }
   
  -  public final TermPositions termPositions(Term term) throws IOException {
  -    return new SegmentsTermPositions(readers, starts, term);
  +  public final TermPositions termPositions() throws IOException {
  +    return new SegmentsTermPositions(readers, starts);
     }
   
     public final void close() throws IOException {
  @@ -240,14 +240,16 @@
     protected int base = 0;
     protected int pointer = 0;
   
  -  SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
  +  private SegmentTermDocs[] segTermDocs;
  +  protected SegmentTermDocs current;              // == segTermDocs[pointer]
  +  
  +  SegmentsTermDocs(SegmentReader[] r, int[] s) {
       readers = r;
       starts = s;
  -    term = t;
  +
  +    segTermDocs = new SegmentTermDocs[r.length];
     }
   
  -  protected SegmentTermDocs current;
  -  
     public final int doc() {
       return base + current.doc;
     }
  @@ -255,14 +257,19 @@
       return current.freq;
     }
   
  +  public final void seek(Term term) {
  +    this.term = term;
  +    this.base = 0;
  +    this.pointer = 0;
  +    this.current = null;
  +  }
  +
     public final boolean next() throws IOException {
       if (current != null && current.next()) {
         return true;
       } else if (pointer < readers.length) {
  -      if (current != null)
  -	current.close();
         base = starts[pointer];
  -      current = termDocs(readers[pointer++]);
  +      current = termDocs(pointer++);
         return next();
       } else
         return false;
  @@ -275,14 +282,13 @@
         while (current == null) {
   	if (pointer < readers.length) {		  // try next segment
   	  base = starts[pointer];
  -	  current = termDocs(readers[pointer++]);
  +	  current = termDocs(pointer++);
   	} else {
   	  return 0;
   	}
         }
         int end = current.read(docs, freqs);
         if (end == 0) {				  // none left in segment
  -	current.close();
   	current = null;
         } else {					  // got some
   	final int b = base;			  // adjust doc numbers
  @@ -302,25 +308,37 @@
       return true;
     }
   
  +  private SegmentTermDocs termDocs(int i) throws IOException {
  +    if (term == null)
  +      return null;
  +    SegmentTermDocs result = segTermDocs[i];
  +    if (result == null)
  +      result = segTermDocs[i] = termDocs(readers[i]);
  +    result.seek(term);
  +    return result;
  +  }
  +
     protected SegmentTermDocs termDocs(SegmentReader reader)
  -       throws IOException {
  -    return (SegmentTermDocs)reader.termDocs(term);
  +    throws IOException {
  +    return (SegmentTermDocs)reader.termDocs();
     }
   
     public final void close() throws IOException {
  -    if (current != null)
  -      current.close();
  +    for (int i = 0; i < segTermDocs.length; i++) {
  +      if (segTermDocs[i] != null)
  +        segTermDocs[i].close();
  +    }
     }
   }
   
   class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
  -  SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
  -    super(r,s,t);
  +  SegmentsTermPositions(SegmentReader[] r, int[] s) {
  +    super(r,s);
     }
   
     protected final SegmentTermDocs termDocs(SegmentReader reader)
          throws IOException {
  -    return (SegmentTermDocs)reader.termPositions(term);
  +    return (SegmentTermDocs)reader.termPositions();
     }
   
     public final int nextPosition() throws IOException {
  
  
  
  1.3       +6 -1      jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java
  
  Index: TermDocs.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TermDocs.java	25 Dec 2001 19:27:04 -0000	1.2
  +++ TermDocs.java	21 Jan 2002 17:07:23 -0000	1.3
  @@ -67,6 +67,11 @@
     */
   
   public interface TermDocs {
  +  /** Sets this to the data for a term.
  +   * The enumeration is reset to the start of the data for this term.
  +   */
  +  void seek(Term term) throws IOException;
  +
     /** Returns the current document number.  <p> This is invalid until {@link
         #next()} is called for the first time.*/
     int doc();
  @@ -91,7 +96,7 @@
     /** Skips entries to the first beyond the current whose document number is
      * greater than or equal to <i>target</i>. <p>Returns true iff there
is such
      * an entry.  <p>Behaves as if written: <pre>
  -   *   public boolean skipTo(int target) {
  +   *   boolean skipTo(int target) {
      *     do {
      *       if (!next())
      * 	     return false;
  
  
  
  1.2       +2 -4      jakarta-lucene/src/java/org/apache/lucene/index/TermEnum.java
  
  Index: TermEnum.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermEnum.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TermEnum.java	18 Sep 2001 16:29:55 -0000	1.1
  +++ TermEnum.java	21 Jan 2002 17:07:23 -0000	1.2
  @@ -65,12 +65,10 @@
     /** Increments the enumeration to the next element.  True if one exists.*/
     abstract public boolean next() throws IOException;
   
  -  /** Returns the current Term in the enumeration.
  -    Initially invalid, valid after next() called for the first time.*/
  +  /** Returns the current Term in the enumeration.*/
     abstract public Term term();
   
  -  /** Returns the docFreq of the current Term in the enumeration.
  -    Initially invalid, valid after next() called for the first time.*/
  +  /** Returns the docFreq of the current Term in the enumeration.*/
     abstract public int docFreq();
   
     /** Closes the enumeration to further activity, freeing resources. */
  
  
  

--
To unsubscribe, e-mail:   <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>


Mime
View raw message