lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gol...@apache.org
Subject cvs commit: jakarta-lucene/src/java/org/apache/lucene/index SegmentTermEnum.java SegmentTermDocs.java SegmentMerger.java TermInfosWriter.java
Date Tue, 20 Apr 2004 13:47:58 GMT
goller      2004/04/20 06:47:58

  Modified:    src/java/org/apache/lucene/index SegmentTermEnum.java
                        SegmentTermDocs.java SegmentMerger.java
                        TermInfosWriter.java
  Log:
  hopefully corrected or at least improved version of skipTo
  
  Revision  Changes    Path
  1.7       +26 -8     jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
  
  Index: SegmentTermEnum.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- SegmentTermEnum.java	19 Apr 2004 14:46:00 -0000	1.6
  +++ SegmentTermEnum.java	20 Apr 2004 13:47:58 -0000	1.7
  @@ -33,6 +33,7 @@
     long indexPointer = 0;
     int indexInterval;
     int skipInterval;
  +  private int formatM1SkipInterval;
     Term prev;
   
     private char[] buffer = {};
  @@ -51,7 +52,7 @@
   
         // back-compatible settings
         indexInterval = 128;
  -      skipInterval = Integer.MAX_VALUE;
  +      skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
   
       } else {
         // we have a format version number
  @@ -62,8 +63,17 @@
           throw new IOException("Unknown format version:" + format);
   
         size = input.readLong();                    // read the size
  -
  -      if (!isIndex) {
  +      
  +      if(format == -1){
  +        if (!isIndex) {
  +          indexInterval = input.readInt();
  +          formatM1SkipInterval = input.readInt();
  +        }
  +        // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid
a bug in 
  +        // skipTo implementation of these versions
  +        skipInterval = Integer.MAX_VALUE;
  +      }
  +      else{
           indexInterval = input.readInt();
           skipInterval = input.readInt();
         }
  @@ -107,13 +117,21 @@
       termInfo.docFreq = input.readVInt();	  // read doc freq
       termInfo.freqPointer += input.readVLong();	  // read freq pointer
       termInfo.proxPointer += input.readVLong();	  // read prox pointer
  -
  -    if (!isIndex) {
  -      if (termInfo.docFreq > skipInterval) {
  -        termInfo.skipOffset = input.readVInt();
  +    
  +    if(format == -1){
  +    //  just read skipOffset in order to increment  file pointer; 
  +    // value is never used since skipTo is switched off
  +      if (!isIndex) {
  +        if (termInfo.docFreq > formatM1SkipInterval) {
  +          termInfo.skipOffset = input.readVInt(); 
  +        }
         }
       }
  -
  +    else{
  +      if (termInfo.docFreq >= skipInterval) 
  +        termInfo.skipOffset = input.readVInt();
  +    }
  +    
       if (isIndex)
         indexPointer += input.readVLong();	  // read index pointer
   
  
  
  
  1.12      +3 -1      jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java
  
  Index: SegmentTermDocs.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- SegmentTermDocs.java	19 Apr 2004 19:32:20 -0000	1.11
  +++ SegmentTermDocs.java	20 Apr 2004 13:47:58 -0000	1.12
  @@ -84,6 +84,8 @@
   
     public void close() throws IOException {
       freqStream.close();
  +    if (skipStream != null)
  +      skipStream.close();
     }
   
     public final int doc() { return doc; }
  @@ -143,7 +145,7 @@
   
     /** Optimized implementation. */
     public boolean skipTo(int target) throws IOException {
  -    if (df > skipInterval) {                      // optimized case
  +    if (df >= skipInterval) {                      // optimized case
   
         if (skipStream == null)
           skipStream = (InputStream) freqStream.clone(); // lazily clone
  
  
  
  1.10      +3 -2      jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java
  
  Index: SegmentMerger.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- SegmentMerger.java	29 Mar 2004 22:48:02 -0000	1.9
  +++ SegmentMerger.java	20 Apr 2004 13:47:58 -0000	1.10
  @@ -234,6 +234,7 @@
     private OutputStream freqOutput = null;
     private OutputStream proxOutput = null;
     private TermInfosWriter termInfosWriter = null;
  +  private int skipInterval;
     private SegmentMergeQueue queue = null;
   
     private final void mergeTerms() throws IOException {
  @@ -242,6 +243,8 @@
         proxOutput = directory.createFile(segment + ".prx");
         termInfosWriter =
                 new TermInfosWriter(directory, segment, fieldInfos);
  +      skipInterval = termInfosWriter.skipInterval;
  +      queue = new SegmentMergeQueue(readers.size());
   
         mergeTermInfos();
   
  @@ -254,7 +257,6 @@
     }
   
     private final void mergeTermInfos() throws IOException {
  -    queue = new SegmentMergeQueue(readers.size());
       int base = 0;
       for (int i = 0; i < readers.size(); i++) {
         IndexReader reader = (IndexReader) readers.elementAt(i);
  @@ -327,7 +329,6 @@
      */
     private final int appendPostings(SegmentMergeInfo[] smis, int n)
             throws IOException {
  -    final int skipInterval = termInfosWriter.skipInterval;
       int lastDoc = 0;
       int df = 0;					  // number of docs w/ term
       resetSkip();
  
  
  
  1.6       +6 -10     jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
  
  Index: TermInfosWriter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- TermInfosWriter.java	25 Mar 2004 13:49:49 -0000	1.5
  +++ TermInfosWriter.java	20 Apr 2004 13:47:58 -0000	1.6
  @@ -27,13 +27,13 @@
   
   final class TermInfosWriter {
     /** The file format version, a negative number. */
  -  public static final int FORMAT = -1;
  +  public static final int FORMAT = -2;
   
     private FieldInfos fieldInfos;
     private OutputStream output;
     private Term lastTerm = new Term("", "");
     private TermInfo lastTi = new TermInfo();
  -  private int size = 0;
  +  private long size = 0;
   
     // TODO: the default values for these two parameters should be settable from
     // IndexWriter.  However, once that's done, folks will start setting them to
  @@ -80,10 +80,8 @@
       output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
       output.writeInt(FORMAT);                      // write format
       output.writeLong(0);                          // leave space for size
  -    if (!isIndex) {
  -      output.writeInt(indexInterval);             // write indexInterval
  -      output.writeInt(skipInterval);              // write skipInterval
  -    }
  +    output.writeInt(indexInterval);             // write indexInterval
  +    output.writeInt(skipInterval);              // write skipInterval
     }
   
     /** Adds a new <Term, TermInfo> pair to the set.
  @@ -106,10 +104,8 @@
       output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
       output.writeVLong(ti.proxPointer - lastTi.proxPointer);
   
  -    if (!isIndex) {
  -      if (ti.docFreq > skipInterval) {
  -        output.writeVInt(ti.skipOffset);
  -      }
  +    if (ti.docFreq >= skipInterval) {
  +      output.writeVInt(ti.skipOffset);
       }
   
       if (isIndex) {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message