lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/test/org/apache/lucene/util English.java
Date Thu, 15 Jan 2004 22:42:35 GMT
cutting     2004/01/15 14:42:35

  Modified:    .        CHANGES.txt
               src/java/org/apache/lucene/index DocumentWriter.java
                        SegmentMerger.java SegmentTermDocs.java
                        SegmentTermEnum.java SegmentTermPositions.java
                        TermInfo.java TermInfosReader.java
                        TermInfosWriter.java
               src/java/org/apache/lucene/search BooleanQuery.java
                        BooleanScorer.java IndexSearcher.java
                        PhrasePositions.java PhraseScorer.java Scorer.java
                        TermScorer.java
               src/java/org/apache/lucene/store RAMDirectory.java
               src/test/org/apache/lucene ThreadSafetyTest.java
  Added:       src/java/org/apache/lucene/search ConjunctionScorer.java
               src/java/org/apache/lucene/store RAMFile.java
                        RAMInputStream.java RAMOutputStream.java
               src/test/org/apache/lucene/search TestBasics.java
               src/test/org/apache/lucene/util English.java
  Log:
  Optimized TermDocs.skipTo() and changed scorers to take advantage of it.
  
  Revision  Changes    Path
  1.66      +31 -1     jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.65
  retrieving revision 1.66
  diff -u -r1.65 -r1.66
  --- CHANGES.txt	26 Dec 2003 18:05:26 -0000	1.65
  +++ CHANGES.txt	15 Jan 2004 22:42:32 -0000	1.66
  @@ -2,6 +2,36 @@
   
   $Id$
   
  +1.4 RC1
  +
  + 1. Changed the format of the .tis file, so that:
  +
  +    - it has a format version number, which makes it easier to
  +      back-compatibly change file formats in the future.
  +
  +    - the term count is now stored as a long.  This was the one aspect
  +      of the Lucene's file formats which limited index size.
  +
  +    - a few internal index parameters are now stored in the index, so
  +      that they can (in theory) now be changed from index to index,
  +      although there is not yet an API to do so.
  +
  +    These changes are back compatible.  The new code can read old
  +    indexes.  But old code will not be able read new indexes. (cutting)
  +
  + 2. Added an optimized implementation of TermDocs.skipTo().  A skip
  +    table is now stored for each term in the .frq file.  This only
  +    adds a percent or two to overall index size, but can substantially
  +    speedup many searches.  (cutting)
  +
  + 3. Restructured the Scorer API and all Scorer implementations to take
  +    advantage of an optimized TermDocs.skipTo() implementation.  In
  +    particular, PhraseQuerys and conjunctive BooleanQuerys are
  +    faster when one clause has substantially fewer matches than the
  +    others.  (A conjunctive BooleanQuery is a BooleanQuery where all
  +    clauses are required.)  (cutting)
  +
  +
   1.3 final
   
    1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
  
  
  
  1.8       +1 -1      jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java
  
  Index: DocumentWriter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- DocumentWriter.java	22 Dec 2003 21:40:18 -0000	1.7
  +++ DocumentWriter.java	15 Jan 2004 22:42:32 -0000	1.8
  @@ -291,7 +291,7 @@
           Posting posting = postings[i];
   
           // add an entry to the dictionary with pointers to prox and freq files
  -        ti.set(1, freq.getFilePointer(), prox.getFilePointer());
  +        ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
           tis.add(posting.term, ti);
   
           // add an entry to the freq file
  
  
  
  1.7       +45 -4     jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java
  
  Index: SegmentMerger.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- SegmentMerger.java	31 Oct 2003 09:28:44 -0000	1.6
  +++ SegmentMerger.java	15 Jan 2004 22:42:32 -0000	1.7
  @@ -62,6 +62,7 @@
   import org.apache.lucene.store.Directory;
   import org.apache.lucene.store.OutputStream;
   import org.apache.lucene.store.InputStream;
  +import org.apache.lucene.store.RAMOutputStream;
   import org.apache.lucene.util.BitVector;
   
   final class SegmentMerger {
  @@ -246,17 +247,21 @@
   
       int df = appendPostings(smis, n);		  // append posting data
   
  +    long skipPointer = writeSkip();
  +
       if (df > 0) {
         // add an entry to the dictionary with pointers to prox and freq files
  -      termInfo.set(df, freqPointer, proxPointer);
  +      termInfo.set(df, freqPointer, proxPointer, (int)(skipPointer-freqPointer));
         termInfosWriter.add(smis[0].term, termInfo);
       }
     }
   
     private final int appendPostings(SegmentMergeInfo[] smis, int n)
          throws IOException {
  +    final int skipInterval = termInfosWriter.skipInterval;
       int lastDoc = 0;
       int df = 0;					  // number of docs w/ term
  +    resetSkip();
       for (int i = 0; i < n; i++) {
         SegmentMergeInfo smi = smis[i];
         TermPositions postings = smi.postings;
  @@ -272,6 +277,12 @@
           if (doc < lastDoc)
             throw new IllegalStateException("docs out of order");
   
  +        df++;
  +
  +        if ((df % skipInterval) == 0) {
  +          bufferSkip(lastDoc);
  +        }
  +
           int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
           lastDoc = doc;
           
  @@ -289,13 +300,43 @@
             proxOutput.writeVInt(position - lastPosition);
             lastPosition = position;
           }
  -
  -        df++;
         }
       }
       return df;
     }
  -  private final void mergeNorms() throws IOException {
  +
  +  private RAMOutputStream skipBuffer = new RAMOutputStream();
  +  private int lastSkipDoc;
  +  private long lastSkipFreqPointer;
  +  private long lastSkipProxPointer;
  +
  +  private void resetSkip() throws IOException {
  +    skipBuffer.reset();
  +    lastSkipDoc = 0;
  +    lastSkipFreqPointer = freqOutput.getFilePointer();
  +    lastSkipProxPointer = proxOutput.getFilePointer();
  +  }
  +
  +  private void bufferSkip(int doc) throws IOException {
  +    long freqPointer = freqOutput.getFilePointer();
  +    long proxPointer = proxOutput.getFilePointer();
  +
  +    skipBuffer.writeVInt(doc - lastSkipDoc); 
  +    skipBuffer.writeVInt((int)(freqPointer - lastSkipFreqPointer));
  +    skipBuffer.writeVInt((int)(proxPointer - lastSkipProxPointer));
  +
  +    lastSkipDoc = doc;
  +    lastSkipFreqPointer = freqPointer;
  +    lastSkipProxPointer = proxPointer;
  +  }
  +
  +  private long writeSkip() throws IOException {
  +    long skipPointer = freqOutput.getFilePointer();
  +    skipBuffer.writeTo(freqOutput);
  +    return skipPointer;
  +  }
  +
  +  private void mergeNorms() throws IOException {
       for (int i = 0; i < fieldInfos.size(); i++) {
         FieldInfo fi = fieldInfos.fieldInfo(i);
         if (fi.isIndexed) {
  
  
  
  1.4       +77 -10    jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java
  
  Index: SegmentTermDocs.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- SegmentTermDocs.java	21 Oct 2003 17:59:16 -0000	1.3
  +++ SegmentTermDocs.java	15 Jan 2004 22:42:32 -0000	1.4
  @@ -61,16 +61,27 @@
   class SegmentTermDocs implements TermDocs {
     protected SegmentReader parent;
     private InputStream freqStream;
  -  private int freqCount;
  +  private int count;
  +  private int df;
     private BitVector deletedDocs;
     int doc = 0;
     int freq;
   
  +  private int skipInterval;
  +  private int skipCount;
  +  private InputStream skipStream;
  +  private int skipDoc;
  +  private long freqPointer;
  +  private long proxPointer;
  +  private long skipPointer;
  +  private boolean haveSkipped;
  +
     SegmentTermDocs(SegmentReader parent)
       throws IOException {
       this.parent = parent;
       this.freqStream = (InputStream)parent.freqStream.clone();
       this.deletedDocs = parent.deletedDocs;
  +    this.skipInterval = parent.tis.getSkipInterval();
     }
     
     public void seek(Term term) throws IOException {
  @@ -88,12 +99,19 @@
     }
     
     void seek(TermInfo ti) throws IOException {
  +    count = 0;
       if (ti == null) {
  -      freqCount = 0;
  +      df = 0;
       } else {
  -      freqCount = ti.docFreq;
  +      df = ti.docFreq;
         doc = 0;
  -      freqStream.seek(ti.freqPointer);
  +      skipDoc = 0;
  +      skipCount = 0;
  +      freqPointer = ti.freqPointer;
  +      proxPointer = ti.proxPointer;
  +      skipPointer = freqPointer + ti.skipOffset;
  +      freqStream.seek(freqPointer);
  +      haveSkipped = false;
       }
     }
     
  @@ -109,7 +127,7 @@
   
     public boolean next() throws IOException {
       while (true) {
  -      if (freqCount == 0)
  +      if (count == df)
   	return false;
   
         int docCode = freqStream.readVInt();
  @@ -119,7 +137,7 @@
         else
   	freq = freqStream.readVInt();		  // else read freq
    
  -      freqCount--;
  +      count++;
       
         if (deletedDocs == null || !deletedDocs.get(doc))
   	break;
  @@ -131,9 +149,9 @@
     /** Optimized implementation. */
     public int read(final int[] docs, final int[] freqs)
         throws IOException {
  -    final int end = docs.length;
  +    final int length = docs.length;
       int i = 0;
  -    while (i < end && freqCount > 0) {
  +    while (i < length && count < df) {
   
         // manually inlined call to next() for speed
         final int docCode = freqStream.readVInt();
  @@ -142,7 +160,7 @@
   	freq = 1;				  // freq is one
         else
   	freq = freqStream.readVInt();		  // else read freq
  -      freqCount--;
  +      count++;
      
         if (deletedDocs == null || !deletedDocs.get(doc)) {
   	docs[i] = doc;
  @@ -153,12 +171,61 @@
       return i;
     }
   
  -  /** As yet unoptimized implementation. */
  +  /** Overridden by SegmentTermPositions to skip in prox stream. */
  +  protected void skipProx(long proxPointer) throws IOException {}
  +
  +  /** Optimized implementation. */
     public boolean skipTo(int target) throws IOException {
  +    if (df > skipInterval) {                      // optimized case
  +
  +      if (skipStream == null)
  +        skipStream = (InputStream)freqStream.clone(); // lazily clone
  +
  +      if (!haveSkipped) {                          // lazily seek skip stream
  +        skipStream.seek(skipPointer);
  +        haveSkipped = true;
  +      }
  +
  +      // scan skip data
  +      int lastSkipDoc = skipDoc;
  +      long lastFreqPointer = freqStream.getFilePointer();
  +      long lastProxPointer = -1;
  +      int numSkipped = -1 -(count % skipInterval);
  +      
  +      while (target > skipDoc) {
  +        lastSkipDoc = skipDoc;
  +        lastFreqPointer = freqPointer;
  +        lastProxPointer = proxPointer;
  +        if (skipDoc >= doc)
  +          numSkipped += skipInterval;
  +        
  +        if ((count + numSkipped + skipInterval) > df)
  +          break;                                  // no more skips
  +
  +        skipDoc += skipStream.readVInt();
  +        freqPointer += skipStream.readVInt();
  +        proxPointer += skipStream.readVInt();
  +        
  +        skipCount++;
  +      }
  +      
  +      // if we found something to skip, then skip it
  +      if (lastFreqPointer > freqStream.getFilePointer()) {
  +        freqStream.seek(lastFreqPointer);
  +        skipProx(lastProxPointer);
  +        
  +        doc = lastSkipDoc;
  +        count += numSkipped;
  +      }
  +
  +    }
  +
  +    // done skipping, now just scan
       do {
         if (!next())
   	return false;
       } while (target > doc);
       return true;
     }
  +
   }
  
  
  
  1.3       +39 -4     jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
  
  Index: SegmentTermEnum.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SegmentTermEnum.java	11 Oct 2001 15:14:14 -0000	1.2
  +++ SegmentTermEnum.java	15 Jan 2004 22:42:32 -0000	1.3
  @@ -60,14 +60,17 @@
   final class SegmentTermEnum extends TermEnum implements Cloneable {
     private InputStream input;
     private FieldInfos fieldInfos;
  -  int size;
  -  int position = -1;
  +  long size;
  +  long position = -1;
   
     private Term term = new Term("", "");
     private TermInfo termInfo = new TermInfo();
   
  -  boolean isIndex = false;
  +  private int format;
  +  private boolean isIndex = false;
     long indexPointer = 0;
  +  int indexInterval;
  +  int skipInterval;
     Term prev;
   
     private char[] buffer = {};
  @@ -76,8 +79,34 @@
          throws IOException {
       input = i;
       fieldInfos = fis; 
  -    size = input.readInt();
       isIndex = isi;
  +
  +    int firstInt = input.readInt();
  +    if (firstInt >= 0) {
  +      // original-format file, without explicit format version number
  +      format = 0;
  +      size = firstInt;
  +
  +      // back-compatible settings
  +      indexInterval = 128;
  +      skipInterval = Integer.MAX_VALUE;
  +
  +    } else {
  +      // we have a format version number
  +      format = firstInt;
  +
  +      // check that it is a format we can understand
  +      if (format < TermInfosWriter.FORMAT)
  +        throw new IOException("Unknown format version:" + format);
  +      
  +      size = input.readLong();                    // read the size
  +      
  +      if (!isIndex) {
  +        indexInterval = input.readInt();
  +        skipInterval = input.readInt();
  +      }
  +    }
  +    
     }
     
     protected Object clone() {
  @@ -117,6 +146,12 @@
       termInfo.freqPointer += input.readVLong();	  // read freq pointer
       termInfo.proxPointer += input.readVLong();	  // read prox pointer
       
  +    if (!isIndex) {
  +      if (termInfo.docFreq > skipInterval) {
  +        termInfo.skipOffset = input.readVInt();
  +      }
  +    }
  +
       if (isIndex)
         indexPointer += input.readVLong();	  // read index pointer
   
  
  
  
  1.5       +7 -0      jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java
  
  Index: SegmentTermPositions.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- SegmentTermPositions.java	21 Oct 2003 17:59:16 -0000	1.4
  +++ SegmentTermPositions.java	15 Jan 2004 22:42:32 -0000	1.5
  @@ -109,4 +109,11 @@
       throw new UnsupportedOperationException();
     }
   
  +
  +  /** Called by super.skipTo(). */
  +  protected void skipProx(long proxPointer) throws IOException {
  +    proxStream.seek(proxPointer);
  +    proxCount = 0;
  +  }
  +
   }
  
  
  
  1.2       +9 -4      jakarta-lucene/src/java/org/apache/lucene/index/TermInfo.java
  
  Index: TermInfo.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfo.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TermInfo.java	18 Sep 2001 16:29:55 -0000	1.1
  +++ TermInfo.java	15 Jan 2004 22:42:32 -0000	1.2
  @@ -62,6 +62,7 @@
   
     long freqPointer = 0;
     long proxPointer = 0;
  +  int skipOffset;
   
     TermInfo() {}
   
  @@ -75,17 +76,21 @@
       docFreq = ti.docFreq;
       freqPointer = ti.freqPointer;
       proxPointer = ti.proxPointer;
  +    skipOffset = ti.skipOffset;
     }
   
  -  final void set(int df, long fp, long pp) {
  -    docFreq = df;
  -    freqPointer = fp;
  -    proxPointer = pp;
  +  final void set(int docFreq,
  +                 long freqPointer, long proxPointer, int skipOffset) {
  +    this.docFreq = docFreq;
  +    this.freqPointer = freqPointer;
  +    this.proxPointer = proxPointer;
  +    this.skipOffset = skipOffset;
     }
   
     final void set(TermInfo ti) {
       docFreq = ti.docFreq;
       freqPointer = ti.freqPointer;
       proxPointer = ti.proxPointer;
  +    skipOffset = ti.skipOffset;
     }
   }
  
  
  
  1.4       +13 -9     jakarta-lucene/src/java/org/apache/lucene/index/TermInfosReader.java
  
  Index: TermInfosReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfosReader.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- TermInfosReader.java	12 Aug 2003 09:17:53 -0000	1.3
  +++ TermInfosReader.java	15 Jan 2004 22:42:32 -0000	1.4
  @@ -68,7 +68,7 @@
     private FieldInfos fieldInfos;
   
     private SegmentTermEnum enumerator;
  -  private int size;
  +  private long size;
   
     TermInfosReader(Directory dir, String seg, FieldInfos fis)
          throws IOException {
  @@ -82,13 +82,17 @@
       readIndex();
     }
   
  +  public int getSkipInterval() {
  +    return enumerator.skipInterval;
  +  }
  +
     final void close() throws IOException {
       if (enumerator != null)
         enumerator.close();
     }
   
     /** Returns the number of term/value pairs in the set. */
  -  final int size() {
  +  final long size() {
       return size;
     }
   
  @@ -101,7 +105,7 @@
         new SegmentTermEnum(directory.openFile(segment + ".tii"),
   			  fieldInfos, true);
       try {
  -      int indexSize = indexEnum.size;
  +      int indexSize = (int)indexEnum.size;
   
         indexTerms = new Term[indexSize];
         indexInfos = new TermInfo[indexSize];
  @@ -137,7 +141,7 @@
   
     private final void seekEnum(int indexOffset) throws IOException {
       enumerator.seek(indexPointers[indexOffset],
  -	      (indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1,
  +	      (indexOffset * enumerator.indexInterval) - 1,
   	      indexTerms[indexOffset], indexInfos[indexOffset]);
     }
   
  @@ -146,10 +150,10 @@
       if (size == 0) return null;
   
       // optimize sequential access: first try scanning cached enumerator w/o seeking
  -    if (enumerator.term() != null			  // term is at or past current
  +    if (enumerator.term() != null                 // term is at or past current
   	&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
   	    || term.compareTo(enumerator.term()) >= 0)) {
  -      int enumOffset = (enumerator.position/TermInfosWriter.INDEX_INTERVAL)+1;
  +      int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
         if (indexTerms.length == enumOffset	  // but before end of block
   	  || term.compareTo(indexTerms[enumOffset]) < 0)
   	return scanEnum(term);			  // no need to seek
  @@ -174,10 +178,10 @@
       if (size == 0) return null;
   
       if (enumerator != null && enumerator.term() != null && position >= enumerator.position &&
  -	position < (enumerator.position + TermInfosWriter.INDEX_INTERVAL))
  +	position < (enumerator.position + enumerator.indexInterval))
         return scanEnum(position);		  // can avoid seek
   
  -    seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek
  +    seekEnum(position / enumerator.indexInterval); // must seek
       return scanEnum(position);
     }
   
  @@ -190,7 +194,7 @@
     }
   
     /** Returns the position of a Term in the set or -1. */
  -  final synchronized int getPosition(Term term) throws IOException {
  +  final synchronized long getPosition(Term term) throws IOException {
       if (size == 0) return -1;
   
       int indexOffset = getIndexOffset(term);
  
  
  
  1.3       +39 -5     jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
  
  Index: TermInfosWriter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TermInfosWriter.java	7 Nov 2002 05:55:39 -0000	1.2
  +++ TermInfosWriter.java	15 Jan 2004 22:42:32 -0000	1.3
  @@ -62,13 +62,36 @@
     Directory.  A TermInfos can be written once, in order.  */
   
   final class TermInfosWriter {
  +  /** The file format version, a negative number. */
  +  public static final int FORMAT = -1;
  +
     private FieldInfos fieldInfos;
     private OutputStream output;
     private Term lastTerm = new Term("", "");
     private TermInfo lastTi = new TermInfo();
     private int size = 0;
   
  -  static final int INDEX_INTERVAL = 128;
  +  // TODO: the default values for these two parameters should be settable from
  +  // IndexWriter.  However, once that's done, folks will start setting them to
  +  // ridiculous values and complaining that things don't work well, as with
  +  // mergeFactor.  So, let's wait until a number of folks find that alternate
  +  // values work better.  Note that both of these values are stored in the
  +  // segment, so that it's safe to change these w/o rebuilding all indexes.
  +
  +  /** Expert: The fraction of terms in the "dictionary" which should be stored
  +   * in RAM.  Smaller values use more memory, but make searching slightly
  +   * faster, while larger values use less memory and make searching slightly
  +   * slower.  Searching is typically not dominated by dictionary lookup, so
  +   * tweaking this is rarely useful.*/
  +  int indexInterval = 128;
  +
  +  /** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
  +   * used to accellerate {@link TermDocs#skipTo(int)}.  Larger values result in
  +   * smaller indexes, greater acceleration, but fewer accelerable cases, while
  +   * smaller values result in bigger indexes, less acceleration and more
  +   * accelerable cases. More detailed experiments would be useful here. */
  +  int skipInterval = 16;
  +
     private long lastIndexPointer = 0;
     private boolean isIndex = false;
   
  @@ -91,7 +114,12 @@
       fieldInfos = fis;
       isIndex = isi;
       output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
  -    output.writeInt(0);				  // leave space for size
  +    output.writeInt(FORMAT);                      // write format
  +    output.writeLong(0);                          // leave space for size
  +    if (!isIndex) {
  +      output.writeInt(indexInterval);             // write indexInterval
  +      output.writeInt(skipInterval);              // write skipInterval
  +    }
     }
   
     /** Adds a new <Term, TermInfo> pair to the set.
  @@ -106,7 +134,7 @@
       if (ti.proxPointer < lastTi.proxPointer)
         throw new IOException("proxPointer out of order");
   
  -    if (!isIndex && size % INDEX_INTERVAL == 0)
  +    if (!isIndex && size % indexInterval == 0)
         other.add(lastTerm, lastTi);		  // add an index term
   
       writeTerm(term);				  // write term
  @@ -114,6 +142,12 @@
       output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
       output.writeVLong(ti.proxPointer - lastTi.proxPointer);
   
  +    if (!isIndex) {
  +      if (ti.docFreq > skipInterval) {
  +        output.writeVInt(ti.skipOffset);
  +      }
  +    }
  +
       if (isIndex) {
         output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
         lastIndexPointer = other.output.getFilePointer(); // write pointer
  @@ -149,8 +183,8 @@
   
     /** Called to complete TermInfos creation. */
     final void close() throws IOException {
  -    output.seek(0);				  // write size at start
  -    output.writeInt(size);
  +    output.seek(4);				  // write size after format
  +    output.writeLong(size);
       output.close();
   
       if (!isIndex)
  
  
  
  1.18      +31 -0     jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java
  
  Index: BooleanQuery.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- BooleanQuery.java	25 Nov 2003 21:16:36 -0000	1.17
  +++ BooleanQuery.java	15 Jan 2004 22:42:32 -0000	1.18
  @@ -158,6 +158,37 @@
       }
   
       public Scorer scorer(IndexReader reader) throws IOException {
  +      // First see if the (faster) ConjunctionScorer will work.  This can be
  +      // used when all clauses are required.  Also, at this point a
  +      // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
  +      // from a BooleanScorer are not always sorted by document number (sigh)
  +      // and hence BooleanScorer cannot implement skipTo() correctly, which is
  +      // required by ConjunctionScorer.
  +      boolean allRequired = true;      
  +      boolean noneBoolean = true;
  +      for (int i = 0 ; i < weights.size(); i++) {
  +        BooleanClause c = (BooleanClause)clauses.elementAt(i);
  +        if (!c.required)
  +          allRequired = false;
  +        if (c.query instanceof BooleanQuery)
  +          noneBoolean = false;
  +      }
  +
  +      if (allRequired && noneBoolean) {           // ConjunctionScorer is okay
  +        ConjunctionScorer result =
  +          new ConjunctionScorer(searcher.getSimilarity());
  +        for (int i = 0 ; i < weights.size(); i++) {
  +          BooleanClause c = (BooleanClause)clauses.elementAt(i);
  +          Weight w = (Weight)weights.elementAt(i);
  +          Scorer subScorer = w.scorer(reader);
  +          if (subScorer == null)
  +            return null;
  +          result.add(subScorer);
  +        }
  +        return result;
  +      }
  +
  +      // Use good-old BooleanScorer instead.
         BooleanScorer result = new BooleanScorer(searcher.getSimilarity());
   
         for (int i = 0 ; i < weights.size(); i++) {
  
  
  
  1.5       +48 -12    jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer.java
  
  Index: BooleanScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- BooleanScorer.java	15 Jan 2003 19:25:04 -0000	1.4
  +++ BooleanScorer.java	15 Jan 2004 22:42:32 -0000	1.5
  @@ -76,14 +76,17 @@
   
     static final class SubScorer {
       public Scorer scorer;
  +    public boolean done;
       public boolean required = false;
       public boolean prohibited = false;
       public HitCollector collector;
       public SubScorer next;
   
       public SubScorer(Scorer scorer, boolean required, boolean prohibited,
  -		     HitCollector collector, SubScorer next) {
  +		     HitCollector collector, SubScorer next)
  +      throws IOException {
         this.scorer = scorer;
  +      this.done = !scorer.next();
         this.required = required;
         this.prohibited = prohibited;
         this.collector = collector;
  @@ -91,7 +94,8 @@
       }
     }
   
  -  final void add(Scorer scorer, boolean required, boolean prohibited) {
  +  final void add(Scorer scorer, boolean required, boolean prohibited)
  +    throws IOException {
       int mask = 0;
       if (required || prohibited) {
         if (nextMask == 0)
  @@ -120,17 +124,45 @@
         coordFactors[i] = getSimilarity().coord(i, maxCoord-1);
     }
   
  -  public final void score(HitCollector results, int maxDoc)
  -    throws IOException {
  +  private int end;
  +  private Bucket current;
  +
  +  public int doc() { return current.doc; }
  +
  +  public boolean next() throws IOException {
  +    boolean more = false;
  +    do {
  +      while (bucketTable.first != null) {         // more queued
  +        current = bucketTable.first;
  +        bucketTable.first = current.next;         // pop the queue
  +
  +        // check prohibited & required
  +        if ((current.bits & prohibitedMask) == 0 && 
  +            (current.bits & requiredMask) == requiredMask) {
  +          return true;
  +        }
  +      }
  +
  +      // refill the queue
  +      end += BucketTable.SIZE;
  +      for (SubScorer sub = scorers; sub != null; sub = sub.next) {
  +        Scorer scorer = sub.scorer;
  +        while (!sub.done && scorer.doc() < end) {
  +          sub.collector.collect(scorer.doc(), scorer.score());
  +          sub.done = !scorer.next();
  +        }
  +        if (!sub.done) {
  +          more  = true;
  +        }
  +      }
  +    } while (bucketTable.first != null | more);
  +    return false;
  +  }
  +
  +  public float score() throws IOException {
       if (coordFactors == null)
         computeCoordFactors();
  -
  -    while (currentDoc < maxDoc) {
  -      currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
  -      for (SubScorer t = scorers; t != null; t = t.next)
  -	t.scorer.score(t.collector, currentDoc);
  -      bucketTable.collectHits(results);
  -    }
  +    return current.score * coordFactors[current.coord];
     }
   
     static final class Bucket {
  @@ -196,7 +228,7 @@
   	bucket.score = score;			  // initialize score
   	bucket.bits = mask;			  // initialize mask
   	bucket.coord = 1;			  // initialize coord
  -	
  +
   	bucket.next = table.first;		  // push onto valid list
   	table.first = bucket;
         } else {					  // valid bucket
  @@ -205,6 +237,10 @@
   	bucket.coord++;				  // increment coord
         }
       }
  +  }
  +
  +  public boolean skipTo(int target) throws IOException {
  +    throw new UnsupportedOperationException();
     }
   
     public Explanation explain(int doc) throws IOException {
  
  
  
  1.12      +2 -2      jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java
  
  Index: IndexSearcher.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- IndexSearcher.java	16 Sep 2003 20:06:32 -0000	1.11
  +++ IndexSearcher.java	15 Jan 2004 22:42:32 -0000	1.12
  @@ -140,7 +140,7 @@
               hq.insert(new ScoreDoc(doc, score));
   	  }
   	}
  -      }, reader.maxDoc());
  +      });
   
       ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
       for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
  @@ -180,7 +180,7 @@
       Scorer scorer = query.weight(this).scorer(reader);
       if (scorer == null)
         return;
  -    scorer.score(collector, reader.maxDoc());
  +    scorer.score(collector);
     }
   
     public Query rewrite(Query original) throws IOException {
  
  
  
  1.2       +15 -3     jakarta-lucene/src/java/org/apache/lucene/search/PhrasePositions.java
  
  Index: PhrasePositions.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhrasePositions.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- PhrasePositions.java	18 Sep 2001 16:29:57 -0000	1.1
  +++ PhrasePositions.java	15 Jan 2004 22:42:32 -0000	1.2
  @@ -68,18 +68,30 @@
     PhrasePositions(TermPositions t, int o) throws IOException {
       tp = t;
       offset = o;
  -    next();
     }
   
  -  final void next() throws IOException {	  // increments to next doc
  +  final boolean next() throws IOException {	  // increments to next doc
       if (!tp.next()) {
         tp.close();				  // close stream
         doc = Integer.MAX_VALUE;			  // sentinel value
  -      return;
  +      return false;
       }
       doc = tp.doc();
       position = 0;
  +    return true;
     }
  +
  +  final boolean skipTo(int target) throws IOException {
  +    if (!tp.skipTo(target)) {
  +      tp.close();				  // close stream
  +      doc = Integer.MAX_VALUE;			  // sentinel value
  +      return false;
  +    }
  +    doc = tp.doc();
  +    position = 0;
  +    return true;
  +  }
  +
   
     final void firstPosition() throws IOException {
       count = tp.freq();				  // read first pos
  
  
  
  1.9       +120 -82   jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java
  
  Index: PhraseScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- PhraseScorer.java	11 Sep 2003 01:25:47 -0000	1.8
  +++ PhraseScorer.java	15 Jan 2004 22:42:32 -0000	1.9
  @@ -60,89 +60,127 @@
   import org.apache.lucene.index.*;
   
   abstract class PhraseScorer extends Scorer {
  -    private Weight weight;
  -    protected byte[] norms;
  -    protected float value;
  -
  -    protected PhraseQueue pq;
  -    protected PhrasePositions first, last;
  -
  -    private float freq;
  -
  -    PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
  -                 byte[] norms) throws IOException {
  -        super(similarity);
  -        this.norms = norms;
  -        this.weight = weight;
  -        this.value = weight.getValue();
  -
  -        // use PQ to build a sorted list of PhrasePositions
  -        pq = new PhraseQueue(tps.length);
  -        for (int i = 0; i < tps.length; i++) {
  -            pq.put(new PhrasePositions(tps[i], i));
  -        }
  -        pqToList();
  -    }
  -
  -    public final void score(HitCollector results, int end) throws IOException {
  -        Similarity similarity = getSimilarity();
  -        while (last.doc < end) {			  // find doc w/ all the terms
  -            while (first.doc < last.doc) {		  // scan forward in first
  -                do {
  -                    first.next();
  -                } while (first.doc < last.doc);
  -                firstToLast();
  -                if (last.doc >= end)
  -                    return;
  -            }
  -
  -            // found doc with all terms
  -            freq = phraseFreq();                        // check for phrase
  -
  -            if (freq > 0.0) {
  -                float score = similarity.tf(freq) * value;  // compute score
  -                score *= Similarity.decodeNorm(norms[first.doc]); // normalize
  -                results.collect(first.doc, score);	  // add to results
  -            }
  -            last.next();				  // resume scanning
  -        }
  -    }
  -
  -    protected abstract float phraseFreq() throws IOException;
  -
  -    protected final void pqToList() {
  -        last = first = null;
  -        while (pq.top() != null) {
  -            PhrasePositions pp = (PhrasePositions) pq.pop();
  -            if (last != null) {			  // add next to end of list
  -                last.next = pp;
  -            } else
  -                first = pp;
  -            last = pp;
  -            pp.next = null;
  -        }
  -    }
  -
  -    protected final void firstToLast() {
  -        last.next = first;			  // move first to end of list
  -        last = first;
  -        first = first.next;
  -        last.next = null;
  -    }
  -
  -    public Explanation explain(final int doc) throws IOException {
  -        Explanation tfExplanation = new Explanation();
  -
  -        score(new HitCollector() {
  -            public final void collect(int d, float score) {
  -            }
  -        }, doc + 1);
  -
  -        float phraseFreq = (first.doc == doc) ? freq : 0.0f;
  -        tfExplanation.setValue(getSimilarity().tf(phraseFreq));
  -        tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
  +  private Weight weight;
  +  protected byte[] norms;
  +  protected float value;
  +
  +  private boolean firstTime = true;
  +  private boolean more = true;
  +  protected PhraseQueue pq;
  +  protected PhrasePositions first, last;
  +
  +  private float freq;
  +
  +  PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
  +               byte[] norms) throws IOException {
  +    super(similarity);
  +    this.norms = norms;
  +    this.weight = weight;
  +    this.value = weight.getValue();
  +
  +    // convert tps to a list
  +    for (int i = 0; i < tps.length; i++) {
  +      PhrasePositions pp = new PhrasePositions(tps[i], i);
  +      if (last != null) {			  // add next to end of list
  +        last.next = pp;
  +      } else
  +        first = pp;
  +      last = pp;
  +    }
  +
  +    pq = new PhraseQueue(tps.length);             // construct empty pq
  +
  +  }
  +
  +  public int doc() { return first.doc; }
   
  -        return tfExplanation;
  +  public boolean next() throws IOException {
  +    if (firstTime) {
  +      sort();
  +      firstTime = false;
  +    } else if (more) {
  +      more = last.next();                         // trigger further scanning
       }
  +
  +    while (more) {
  +      while (more && first.doc < last.doc) {      // find doc w/ all the terms
  +        more = first.skipTo(last.doc);            // skip first upto last
  +        firstToLast();                            // and move it to the end
  +      }
  +
  +      if (more) {
  +        // found a doc with all of the terms
  +        freq = phraseFreq();                      // check for phrase
  +        if (freq == 0.0f)                         // no match
  +          more = last.next();                     // trigger further scanning
  +        else
  +          return true;                            // found a match
  +      }
  +    }
  +    return false;                                 // no more matches
  +  }
  +
  +  public float score() throws IOException {
  +    //System.out.println("scoring " + first.doc);
  +    float raw = getSimilarity().tf(freq) * value; // raw score
  +    return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
  +  }
  +
  +  public boolean skipTo(int target) throws IOException {
  +    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
  +      more = pp.skipTo(target);
  +    }
  +    if (more)
  +      sort();                                     // re-sort
  +    return more;
  +  }
  +
  +
  +  protected abstract float phraseFreq() throws IOException;
  +
  +  private void sort() throws IOException {
  +    pq.clear();
  +    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
  +      more = pp.next();
  +      if (more) {
  +        pq.put(pp);
  +      } else {
  +        return;
  +      }
  +    }
  +    pqToList();
  +  }
  +
  +  protected final void pqToList() {
  +    last = first = null;
  +    while (pq.top() != null) {
  +      PhrasePositions pp = (PhrasePositions) pq.pop();
  +      if (last != null) {			  // add next to end of list
  +        last.next = pp;
  +      } else
  +        first = pp;
  +      last = pp;
  +      pp.next = null;
  +    }
  +  }
  +
  +  protected final void firstToLast() {
  +    last.next = first;			  // move first to end of list
  +    last = first;
  +    first = first.next;
  +    last.next = null;
  +  }
  +
  +  public Explanation explain(final int doc) throws IOException {
  +    Explanation tfExplanation = new Explanation();
  +
  +    while (next() && doc() < doc) {}
  +
  +    float phraseFreq = (doc() == doc) ? freq : 0.0f;
  +    tfExplanation.setValue(getSimilarity().tf(phraseFreq));
  +    tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
  +
  +    return tfExplanation;
  +  }
   
   }
  
  
  
  1.4       +32 -4     jakarta-lucene/src/java/org/apache/lucene/search/Scorer.java
  
  Index: Scorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/Scorer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- Scorer.java	13 Jan 2003 23:50:33 -0000	1.3
  +++ Scorer.java	15 Jan 2004 22:42:32 -0000	1.4
  @@ -70,11 +70,39 @@
       return this.similarity;
     }
   
  -  /** Scores hits and passes them to a collector.  Stops at the last document
  -   * before <code>maxDoc</code>.  If called repeatedly, will restart at point
  -   * where it last left off.
  +  /** Scores all documents and passes them to a collector. */
  +  public void score(HitCollector hc) throws IOException {
  +    while (next()) {
  +      hc.collect(doc(), score());
  +    }
  +  }
  +
  +  /** Advance to the next document matching the query.  Returns true iff there
  +   * is another match. */
  +  public abstract boolean next() throws IOException;
  +
  +  /** Returns the current document number.  Initially invalid, until {@link
  +   * #next()} is called the first time. */
  +  public abstract int doc();
  +
  +  /** Returns the score of the current document.  Initially invalid, until
  +   * {@link #next()} is called the first time. */
  +  public abstract float score() throws IOException;
  +
  +  /** Skips to the first match beyond the current whose document number is
  +   * greater than or equal to <i>target</i>. <p>Returns true iff there is such
  +   * a match.  <p>Behaves as if written: <pre>
  +   *   boolean skipTo(int target) {
  +   *     do {
  +   *       if (!next())
  +   * 	     return false;
  +   *     } while (target > doc());
  +   *     return true;
  +   *   }
  +   * </pre>
  +   * Most implementations are considerably more efficient than that.
      */
  -  public abstract void score(HitCollector hc, int maxDoc) throws IOException;
  +  public abstract boolean skipTo(int target) throws IOException;
   
     /** Returns an explanation of the score for <code>doc</code>. */
     public abstract Explanation explain(int doc) throws IOException;
  
  
  
  1.7       +45 -33    jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java
  
  Index: TermScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- TermScorer.java	29 Jan 2003 17:18:55 -0000	1.6
  +++ TermScorer.java	15 Jan 2004 22:42:32 -0000	1.7
  @@ -83,44 +83,56 @@
   
       for (int i = 0; i < SCORE_CACHE_SIZE; i++)
         scoreCache[i] = getSimilarity().tf(i) * weightValue;
  +  }
   
  -    pointerMax = termDocs.read(docs, freqs);	  // fill buffers
  +  public int doc() { return doc; }
   
  -    if (pointerMax != 0)
  -      doc = docs[0];
  -    else {
  -      termDocs.close();				  // close stream
  -      doc = Integer.MAX_VALUE;			  // set to sentinel value
  -    }
  +  public boolean next() throws IOException {
  +    pointer++;
  +    if (pointer >= pointerMax) {
  +      pointerMax = termDocs.read(docs, freqs);    // refill buffer
  +      if (pointerMax != 0) {
  +        pointer = 0;
  +      } else {
  +        termDocs.close();			  // close stream
  +        doc = Integer.MAX_VALUE;		  // set to sentinel value
  +        return false;
  +      }
  +    } 
  +    doc = docs[pointer];
  +    return true;
     }
   
  -  public final void score(HitCollector c, final int end) throws IOException {
  -    int d = doc;				  // cache doc in local
  -    Similarity similarity = getSimilarity();      // cache sim in local
  -    while (d < end) {				  // for docs in window
  -      final int f = freqs[pointer];
  -      float score =				  // compute tf(f)*weight
  -	f < SCORE_CACHE_SIZE			  // check cache
  -	 ? scoreCache[f]			  // cache hit
  -	 : similarity.tf(f)*weightValue;          // cache miss
  -
  -      score *= Similarity.decodeNorm(norms[d]);	  // normalize for field
  -
  -      c.collect(d, score);			  // collect score
  -
  -      if (++pointer == pointerMax) {
  -	pointerMax = termDocs.read(docs, freqs);  // refill buffers
  -	if (pointerMax != 0) {
  -	  pointer = 0;
  -	} else {
  -	  termDocs.close();			  // close stream
  -	  doc = Integer.MAX_VALUE;		  // set to sentinel value
  -	  return;
  -	}
  -      } 
  -      d = docs[pointer];
  +  public float score() throws IOException {
  +    int f = freqs[pointer];
  +    float raw =                                   // compute tf(f)*weight
  +      f < SCORE_CACHE_SIZE			  // check cache
  +      ? scoreCache[f]                             // cache hit
  +      : getSimilarity().tf(f)*weightValue;        // cache miss
  +
  +    return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
  +  }
  +
  +  public boolean skipTo(int target) throws IOException {
  +    // first scan in cache
  +    for (pointer++; pointer < pointerMax; pointer++) {
  +      if (!(target > docs[pointer])) {
  +        doc = docs[pointer];
  +        return true;
  +      }
  +    }
  +
  +    // not found in cache, seek underlying stream
  +    boolean result = termDocs.skipTo(target);
  +    if (result) {
  +      pointerMax = 1;
  +      pointer = 0;
  +      docs[pointer] = doc = termDocs.doc();
  +      freqs[pointer] = termDocs.freq();
  +    } else {
  +      doc = Integer.MAX_VALUE;
       }
  -    doc = d;					  // flush cache
  +    return result;
     }
   
     public Explanation explain(int doc) throws IOException {
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java
  
  Index: ConjunctionScorer.java
  ===================================================================
  package org.apache.lucene.search;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2004 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.io.IOException;
  import java.util.*;
  import org.apache.lucene.index.*;
  
  /** Scorer for conjunctions, sets of queries, all of which are required. */
  final class ConjunctionScorer extends Scorer {
    private LinkedList scorers = new LinkedList();
    private boolean firstTime = true;
    private boolean more = true;
    private float coord;
  
    public ConjunctionScorer(Similarity similarity) {
      super(similarity);
    }
  
    final void add(Scorer scorer) throws IOException {
      scorers.addLast(scorer);
    }
  
    private Scorer first() { return (Scorer)scorers.getFirst(); }
    private Scorer last() { return (Scorer)scorers.getLast(); }
  
    public int doc() { return first().doc(); }
  
    public boolean next() throws IOException {
      if (firstTime) {
        init();
      } else if (more) {
        more = last().next();                       // trigger further scanning
      }
  
      while (more && first().doc() < last().doc()) { // find doc w/ all clauses
        more = first().skipTo(last().doc());      // skip first upto last
        scorers.addLast(scorers.removeFirst());   // move first to last
      }
      
      return more;                                // found a doc with all clauses
    }
  
    public boolean skipTo(int target) throws IOException {
      Iterator i = scorers.iterator();
      while (more && i.hasNext()) {
        more = ((Scorer)i.next()).skipTo(target);
      }
      if (more)
        sortScorers();                              // re-sort scorers
      return more;
    }
  
    public float score() throws IOException {
      float score = 0.0f;                           // sum scores
      Iterator i = scorers.iterator();
      while (i.hasNext())
        score += ((Scorer)i.next()).score();
      score *= coord;
      return score;
    }
  
    private void init() throws IOException {
      more = scorers.size() > 0;
  
      // compute coord factor
      coord = getSimilarity().coord(scorers.size(), scorers.size());
  
      // move each scorer to its first entry
      Iterator i = scorers.iterator();
      while (more && i.hasNext()) {
        more = ((Scorer)i.next()).next();
      }
      if (more)
        sortScorers();                              // initial sort of list
  
      firstTime = false;
    }
  
    private void sortScorers() throws IOException {
      // move scorers to an array
      Scorer[] array = (Scorer[])scorers.toArray(new Scorer[scorers.size()]);
      scorers.clear();                              // empty the list
  
      Arrays.sort(array, new Comparator() {         // sort the array
          public int compare(Object o1, Object o2) {
            return ((Scorer)o1).doc() - ((Scorer)o2).doc();
          }
          public boolean equals(Object o1, Object o2) {
            return ((Scorer)o1).doc() == ((Scorer)o2).doc();
          }
        });
      
      for (int i = 0; i < array.length; i++) {
        scorers.addLast(array[i]);                  // re-build list, now sorted
      }
    }
  
    public Explanation explain(int doc) throws IOException {
      throw new UnsupportedOperationException();
    }
  
  }
  
  
  
  1.12      +1 -96     jakarta-lucene/src/java/org/apache/lucene/store/RAMDirectory.java
  
  Index: RAMDirectory.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/store/RAMDirectory.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- RAMDirectory.java	18 Nov 2003 13:05:13 -0000	1.11
  +++ RAMDirectory.java	15 Jan 2004 22:42:34 -0000	1.12
  @@ -226,98 +226,3 @@
     public final void close() {
     }
   }
  -
  -
  -final class RAMInputStream extends InputStream implements Cloneable {
  -  RAMFile file;
  -  int pointer = 0;
  -
  -  public RAMInputStream(RAMFile f) {
  -    file = f;
  -    length = file.length;
  -  }
  -
  -  /** InputStream methods */
  -  public final void readInternal(byte[] dest, int destOffset, int len) {
  -    int remainder = len;
  -    int start = pointer;
  -    while (remainder != 0) {
  -      int bufferNumber = start/InputStream.BUFFER_SIZE;
  -      int bufferOffset = start%InputStream.BUFFER_SIZE;
  -      int bytesInBuffer = InputStream.BUFFER_SIZE - bufferOffset;
  -      int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
  -      byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
  -      System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
  -      destOffset += bytesToCopy;
  -      start += bytesToCopy;
  -      remainder -= bytesToCopy;
  -    }
  -    pointer += len;
  -  }
  -
  -  public final void close() {
  -  }
  -
  -  /** Random-access methods */
  -  public final void seekInternal(long pos) {
  -    pointer = (int)pos;
  -  }
  -}
  -
  -
  -final class RAMOutputStream extends OutputStream {
  -  RAMFile file;
  -  int pointer = 0;
  -
  -  public RAMOutputStream(RAMFile f) {
  -    file = f;
  -  }
  -
  -  /** output methods: */
  -  public final void flushBuffer(byte[] src, int len) {
  -    int bufferNumber = pointer/OutputStream.BUFFER_SIZE;
  -    int bufferOffset = pointer%OutputStream.BUFFER_SIZE;
  -    int bytesInBuffer = OutputStream.BUFFER_SIZE - bufferOffset;
  -    int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
  -
  -    if (bufferNumber == file.buffers.size())
  -      file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
  -
  -    byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
  -    System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
  -
  -    if (bytesToCopy < len) {			  // not all in one buffer
  -      int srcOffset = bytesToCopy;
  -      bytesToCopy = len - bytesToCopy;		  // remaining bytes
  -      bufferNumber++;
  -      if (bufferNumber == file.buffers.size())
  -        file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
  -      buffer = (byte[])file.buffers.elementAt(bufferNumber);
  -      System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
  -    }
  -    pointer += len;
  -    if (pointer > file.length)
  -      file.length = pointer;
  -
  -    file.lastModified = System.currentTimeMillis();
  -  }
  -
  -  public final void close() throws IOException {
  -    super.close();
  -  }
  -
  -  /** Random-access methods */
  -  public final void seek(long pos) throws IOException {
  -    super.seek(pos);
  -    pointer = (int)pos;
  -  }
  -  public final long length() throws IOException {
  -    return file.length;
  -  }
  -}
  -
  -final class RAMFile {
  -  Vector buffers = new Vector();
  -  long length;
  -  long lastModified = System.currentTimeMillis();
  -}
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/store/RAMFile.java
  
  Index: RAMFile.java
  ===================================================================
  package org.apache.lucene.store;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.util.Vector;
  
  class RAMFile {
    Vector buffers = new Vector();
    long length;
    long lastModified = System.currentTimeMillis();
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/store/RAMInputStream.java
  
  Index: RAMInputStream.java
  ===================================================================
  package org.apache.lucene.store;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  /**
   * A memory-resident {@link InputStream} implementation.
   *
   * @version $Id: RAMInputStream.java,v 1.1 2004/01/15 22:42:34 cutting Exp $
   */
  
  class RAMInputStream extends InputStream implements Cloneable {
    private RAMFile file;
    private int pointer = 0;
  
    public RAMInputStream(RAMFile f) {
      file = f;
      length = file.length;
    }
  
    public void readInternal(byte[] dest, int destOffset, int len) {
      int remainder = len;
      int start = pointer;
      while (remainder != 0) {
        int bufferNumber = start/BUFFER_SIZE;
        int bufferOffset = start%BUFFER_SIZE;
        int bytesInBuffer = BUFFER_SIZE - bufferOffset;
        int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
        byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
        System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
        destOffset += bytesToCopy;
        start += bytesToCopy;
        remainder -= bytesToCopy;
      }
      pointer += len;
    }
  
    public void close() {
    }
  
    public void seekInternal(long pos) {
      pointer = (int)pos;
    }
  }
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/store/RAMOutputStream.java
  
  Index: RAMOutputStream.java
  ===================================================================
  package org.apache.lucene.store;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.io.IOException;
  
  /**
   * A memory-resident {@link OutputStream} implementation.
   *
   * @version $Id: RAMOutputStream.java,v 1.1 2004/01/15 22:42:34 cutting Exp $
   */
  
  public class RAMOutputStream extends OutputStream {
    private RAMFile file;
    private int pointer = 0;
  
    /** Construct an empty output buffer. */
    public RAMOutputStream() {
      this(new RAMFile());
    }
  
    RAMOutputStream(RAMFile f) {
      file = f;
    }
  
    /** Copy the current contents of this buffer to the named output. */
    public void writeTo(OutputStream out) throws IOException {
      flush();
      final long end = file.length;
      long pos = 0;
      int buffer = 0;
      while (pos < end) {
        int length = BUFFER_SIZE;
        long nextPos = pos + length;
        if (nextPos > end) {                        // at the last buffer
          length = (int)(end - pos);
        }
        out.writeBytes((byte[])file.buffers.elementAt(buffer++), length);
        pos = nextPos;
      }
    }
  
    /** Resets this to an empty buffer. */
    public void reset() {
      try {
        seek(0);
      } catch (IOException e) {                     // should never happen
        throw new RuntimeException(e.toString());
      }
  
      file.length = 0;
    }
  
    public void flushBuffer(byte[] src, int len) {
      int bufferNumber = pointer/BUFFER_SIZE;
      int bufferOffset = pointer%BUFFER_SIZE;
      int bytesInBuffer = BUFFER_SIZE - bufferOffset;
      int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
  
      if (bufferNumber == file.buffers.size())
        file.buffers.addElement(new byte[BUFFER_SIZE]);
  
      byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
      System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
  
      if (bytesToCopy < len) {			  // not all in one buffer
        int srcOffset = bytesToCopy;
        bytesToCopy = len - bytesToCopy;		  // remaining bytes
        bufferNumber++;
        if (bufferNumber == file.buffers.size())
          file.buffers.addElement(new byte[BUFFER_SIZE]);
        buffer = (byte[])file.buffers.elementAt(bufferNumber);
        System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
      }
      pointer += len;
      if (pointer > file.length)
        file.length = pointer;
  
      file.lastModified = System.currentTimeMillis();
    }
  
    public void close() throws IOException {
      super.close();
    }
  
    public void seek(long pos) throws IOException {
      super.seek(pos);
      pointer = (int)pos;
    }
    public long length() {
      return file.length;
    }
  }
  
  
  
  1.7       +3 -74     jakarta-lucene/src/test/org/apache/lucene/ThreadSafetyTest.java
  
  Index: ThreadSafetyTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/ThreadSafetyTest.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ThreadSafetyTest.java	25 Sep 2003 22:01:51 -0000	1.6
  +++ ThreadSafetyTest.java	15 Jan 2004 22:42:34 -0000	1.7
  @@ -54,6 +54,7 @@
    * <http://www.apache.org/>.
    */
   
  +import org.apache.lucene.util.*;
   import org.apache.lucene.store.*;
   import org.apache.lucene.document.*;
   import org.apache.lucene.analysis.*;
  @@ -93,7 +94,7 @@
             Document d = new Document();
             int n = RANDOM.nextInt();
             d.add(Field.Keyword("id", Integer.toString(n)));
  -          d.add(Field.UnStored("contents", intToEnglish(n)));
  +          d.add(Field.UnStored("contents", English.intToEnglish(n)));
             System.out.println("Adding " + n);
             
             // Switch between single and multiple file segments
  @@ -151,7 +152,7 @@
         throws Exception {
         System.out.println("Searching for " + n);
         Hits hits =
  -        searcher.search(QueryParser.parse(intToEnglish(n), "contents",
  +        searcher.search(QueryParser.parse(English.intToEnglish(n), "contents",
                                             ANALYZER));
         System.out.println("Search for " + n + ": total=" + hits.length());
         for (int j = 0; j < Math.min(3, hits.length()); j++) {
  @@ -196,77 +197,5 @@
   
       SearcherThread searcherThread3 = new SearcherThread(true);
       searcherThread3.start();
  -  }
  -
  -  private static String intToEnglish(int i) {
  -    StringBuffer result = new StringBuffer();
  -    intToEnglish(i, result);
  -    return result.toString();
  -  }
  -
  -  private static void intToEnglish(int i, StringBuffer result) {
  -    if (i < 0) {
  -      result.append("minus ");
  -      i = -i;
  -    }
  -    if (i >= 1000000000) {			  // billions
  -      intToEnglish(i/1000000000, result);
  -      result.append("billion, ");
  -      i = i%1000000000;
  -    }
  -    if (i >= 1000000) {				  // millions
  -      intToEnglish(i/1000000, result);
  -      result.append("million, ");
  -      i = i%1000000;
  -    }
  -    if (i >= 1000) {				  // thousands
  -      intToEnglish(i/1000, result);
  -      result.append("thousand, ");
  -      i = i%1000;
  -    }
  -    if (i >= 100) {				  // hundreds
  -      intToEnglish(i/100, result);
  -      result.append("hundred ");
  -      i = i%100;
  -    }
  -    if (i >= 20) {
  -      switch (i/10) {
  -      case 9 : result.append("ninety"); break;
  -      case 8 : result.append("eighty"); break;
  -      case 7 : result.append("seventy"); break;
  -      case 6 : result.append("sixty"); break;
  -      case 5 : result.append("fifty"); break;
  -      case 4 : result.append("forty"); break;
  -      case 3 : result.append("thirty"); break;
  -      case 2 : result.append("twenty"); break;
  -      }
  -      i = i%10;
  -      if (i == 0)
  -        result.append(" ");
  -      else 
  -        result.append("-");
  -    }
  -    switch (i) {
  -    case 19 : result.append("nineteen "); break;
  -    case 18 : result.append("eighteen "); break;
  -    case 17 : result.append("seventeen "); break;
  -    case 16 : result.append("sixteen "); break;
  -    case 15 : result.append("fifteen "); break;
  -    case 14 : result.append("fourteen "); break;
  -    case 13 : result.append("thirteen "); break;
  -    case 12 : result.append("twelve "); break;
  -    case 11 : result.append("eleven "); break;
  -    case 10 : result.append("ten "); break;
  -    case 9 : result.append("nine "); break;
  -    case 8 : result.append("eight "); break;
  -    case 7 : result.append("seven "); break;
  -    case 6 : result.append("six "); break;
  -    case 5 : result.append("five "); break;
  -    case 4 : result.append("four "); break;
  -    case 3 : result.append("three "); break;
  -    case 2 : result.append("two "); break;
  -    case 1 : result.append("one "); break;
  -    case 0 : result.append(""); break;
  -    }
     }
   }
  
  
  
  1.1                  jakarta-lucene/src/test/org/apache/lucene/search/TestBasics.java
  
  Index: TestBasics.java
  ===================================================================
  package org.apache.lucene.search;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import junit.framework.TestCase;
  import org.apache.lucene.util.English;
  import org.apache.lucene.analysis.SimpleAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.store.RAMDirectory;
  
  /**
   * Tests basic search capabilities.
   *
   * @author Doug Cutting
   */
  public class TestBasics extends TestCase {
    private IndexSearcher searcher;
  
    public void setUp() throws Exception {
      RAMDirectory directory = new RAMDirectory();
      IndexWriter writer
        = new IndexWriter(directory, new SimpleAnalyzer(), true);
      //writer.infoStream = System.out;
      StringBuffer buffer = new StringBuffer();
      for (int i = 0; i < 1000; i++) {
        Document doc = new Document();
        doc.add(Field.Text("field", English.intToEnglish(i)));
        writer.addDocument(doc);
      }
  
      writer.close();
  
      searcher = new IndexSearcher(directory);
    }
  
    public void testTerm() throws Exception {
      Query query = new TermQuery(new Term("field", "seventy"));
      Hits hits = searcher.search(query);
      assertEquals(100, hits.length());
    }
  
    public void testTerm2() throws Exception {
      Query query = new TermQuery(new Term("field", "seventish"));
      Hits hits = searcher.search(query);
      assertEquals(0, hits.length());
    }
  
    public void testPhrase() throws Exception {
      PhraseQuery query = new PhraseQuery();
      query.add(new Term("field", "seventy"));
      query.add(new Term("field", "seven"));
      Hits hits = searcher.search(query);
      assertEquals(10, hits.length());
    }
  
    public void testPhrase2() throws Exception {
      PhraseQuery query = new PhraseQuery();
      query.add(new Term("field", "seventish"));
      query.add(new Term("field", "sevenon"));
      Hits hits = searcher.search(query);
      assertEquals(0, hits.length());
    }
  
    public void testBoolean() throws Exception {
      BooleanQuery query = new BooleanQuery();
      query.add(new TermQuery(new Term("field", "seventy")), true, false);
      query.add(new TermQuery(new Term("field", "seven")), true, false);
      Hits hits = searcher.search(query);
      assertEquals(19, hits.length());
    }
  
    public void testBoolean2() throws Exception {
      BooleanQuery query = new BooleanQuery();
      query.add(new TermQuery(new Term("field", "sevento")), true, false);
      query.add(new TermQuery(new Term("field", "sevenly")), true, false);
      Hits hits = searcher.search(query);
      assertEquals(0, hits.length());
    }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/test/org/apache/lucene/util/English.java
  
  Index: English.java
  ===================================================================
  package org.apache.lucene.util;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  public class English {
  
    public static String intToEnglish(int i) {
      StringBuffer result = new StringBuffer();
      intToEnglish(i, result);
      return result.toString();
    }
  
    public static void intToEnglish(int i, StringBuffer result) {
      if (i == 0) {
        result.append("zero");
        return;
      }
      if (i < 0) {
        result.append("minus ");
        i = -i;
      }
      if (i >= 1000000000) {			  // billions
        intToEnglish(i/1000000000, result);
        result.append("billion, ");
        i = i%1000000000;
      }
      if (i >= 1000000) {				  // millions
        intToEnglish(i/1000000, result);
        result.append("million, ");
        i = i%1000000;
      }
      if (i >= 1000) {				  // thousands
        intToEnglish(i/1000, result);
        result.append("thousand, ");
        i = i%1000;
      }
      if (i >= 100) {				  // hundreds
        intToEnglish(i/100, result);
        result.append("hundred ");
        i = i%100;
      }
      if (i >= 20) {
        switch (i/10) {
        case 9 : result.append("ninety"); break;
        case 8 : result.append("eighty"); break;
        case 7 : result.append("seventy"); break;
        case 6 : result.append("sixty"); break;
        case 5 : result.append("fifty"); break;
        case 4 : result.append("forty"); break;
        case 3 : result.append("thirty"); break;
        case 2 : result.append("twenty"); break;
        }
        i = i%10;
        if (i == 0)
          result.append(" ");
        else 
          result.append("-");
      }
      switch (i) {
      case 19 : result.append("nineteen "); break;
      case 18 : result.append("eighteen "); break;
      case 17 : result.append("seventeen "); break;
      case 16 : result.append("sixteen "); break;
      case 15 : result.append("fifteen "); break;
      case 14 : result.append("fourteen "); break;
      case 13 : result.append("thirteen "); break;
      case 12 : result.append("twelve "); break;
      case 11 : result.append("eleven "); break;
      case 10 : result.append("ten "); break;
      case 9 : result.append("nine "); break;
      case 8 : result.append("eight "); break;
      case 7 : result.append("seven "); break;
      case 6 : result.append("six "); break;
      case 5 : result.append("five "); break;
      case 4 : result.append("four "); break;
      case 3 : result.append("three "); break;
      case 2 : result.append("two "); break;
      case 1 : result.append("one "); break;
      case 0 : result.append(""); break;
      }
    }
  
    public static void main(String[] args) {
      System.out.println(intToEnglish(Integer.parseInt(args[0])));
    }
  
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message