lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject cvs commit: jakarta-lucene/src/test/org/apache/lucene/index TestFilterIndexReader.java
Date Tue, 21 Oct 2003 17:59:17 GMT
cutting     2003/10/21 10:59:17

  Modified:    .        CHANGES.txt
               src/java/org/apache/lucene/index FieldInfos.java
                        IndexReader.java IndexWriter.java
                        MultipleTermPositions.java SegmentMergeInfo.java
                        SegmentMerger.java SegmentReader.java
                        SegmentTermDocs.java SegmentTermPositions.java
                        SegmentsReader.java TermDocs.java
  Added:       src/java/org/apache/lucene/index FilterIndexReader.java
               src/test/org/apache/lucene/index TestFilterIndexReader.java
  Log:
  Changed IndexReader so that it can be subclassed.
  
  Revision  Changes    Path
  1.55      +13 -1     jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.54
  retrieving revision 1.55
  diff -u -r1.54 -r1.55
  --- CHANGES.txt	3 Oct 2003 15:16:24 -0000	1.54
  +++ CHANGES.txt	21 Oct 2003 17:59:16 -0000	1.55
  @@ -40,6 +40,18 @@
   
   10. Added Locale setting to QueryParser, for use by date range parsing.
   
  +11. Changed IndexReader so that it can be subclassed by classes
  +    outside of its package.  Previously it had package-private
  +    abstract methods.  Also modified the index merging code so that it
  +    can work on an arbitrary IndexReader implementation, and added a
  +    new method, IndexWriter.addIndexes(IndexReader[]), to take
  +    advantage of this. (cutting)
  +
  +12. Added a limit to the number of clauses which may be added to a
  +    BooleanQuery.  The default limit is 1024 clauses.  This should
  +    stop most OutOfMemoryExceptions by prefix, wildcard and fuzzy
  +    queries which run amok. (cutting)
  +
   
   1.3 RC1
   
  
  
  
  1.4       +6 -5      jakarta-lucene/src/java/org/apache/lucene/index/FieldInfos.java
  
  Index: FieldInfos.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/FieldInfos.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- FieldInfos.java	30 Apr 2002 15:08:10 -0000	1.3
  +++ FieldInfos.java	21 Oct 2003 17:59:16 -0000	1.4
  @@ -57,6 +57,8 @@
   import java.util.Hashtable;
   import java.util.Vector;
   import java.util.Enumeration;
  +import java.util.Collection;
  +import java.util.Iterator;
   import java.io.IOException;
   
   import org.apache.lucene.document.Document;
  @@ -92,11 +94,10 @@
       }
     }
   
  -  /** Merges in information from another FieldInfos. */
  -  final void add(FieldInfos other) {
  -    for (int i = 0; i < other.size(); i++) {
  -      FieldInfo fi = other.fieldInfo(i);
  -      add(fi.name, fi.isIndexed);
  +  final void add(Collection names, boolean isIndexed) {
  +    Iterator i = names.iterator();
  +    while (i.hasNext()) {
  +      add((String)i.next(), isIndexed);
       }
     }
   
  
  
  
  1.21      +13 -4     jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
  
  Index: IndexReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- IndexReader.java	17 Oct 2003 10:49:42 -0000	1.20
  +++ IndexReader.java	21 Oct 2003 17:59:16 -0000	1.21
  @@ -86,7 +86,7 @@
       segmentInfosAge = Long.MAX_VALUE;
     }
   
  -  Directory directory;
  +  private Directory directory;
     private Lock writeLock;
   
     //used to determine whether index has chaged since reader was opened
  @@ -131,6 +131,9 @@
       }
     }
   
  +  /** Returns the directory this index resides in. */
  +  public Directory directory() { return directory; }
  +
     /** Returns the time the index in the named directory was last modified. */
     public static long lastModified(String directory) throws IOException {
       return lastModified(new File(directory));
  @@ -194,6 +197,9 @@
     /** Returns true if document <i>n</i> has been deleted */
     public abstract boolean isDeleted(int n);
   
  +  /** Returns true if any documents have been deleted */
  +  public abstract boolean hasDeletions();
  +
     /** Returns the byte-encoded normalization factor for the named field of
      * every document.  This is used by the search code to score documents.
      *
  @@ -286,7 +292,10 @@
       doDelete(docNum);
     }
   
  -  abstract void doDelete(int docNum) throws IOException;
  +  /** Implements deletion of the document numbered <code>docNum</code>.
  +   * Applications should call {@link #delete(int)} or {@link #delete(Term)}.
  +   */
  +  protected abstract void doDelete(int docNum) throws IOException;
   
     /** Deletes all documents containing <code>term</code>.
       This is useful if one uses a document field to hold a unique ID string for
  @@ -323,7 +332,7 @@
     }
   
     /** Implements close. */
  -  abstract void doClose() throws IOException;
  +  protected abstract void doClose() throws IOException;
   
     /** Release the write lock, if needed. */
     protected final void finalize() throws IOException {
  
  
  
  1.19      +39 -8     jakarta-lucene/src/java/org/apache/lucene/index/IndexWriter.java
  
  Index: IndexWriter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexWriter.java,v
  retrieving revision 1.18
  retrieving revision 1.19
  diff -u -r1.18 -r1.19
  --- IndexWriter.java	17 Oct 2003 10:49:42 -0000	1.18
  +++ IndexWriter.java	21 Oct 2003 17:59:16 -0000	1.19
  @@ -324,6 +324,37 @@
       optimize();					  // final cleanup
     }
   
  +  /** Merges the provided indexes into this index.
  +   * <p>After this completes, the index is optimized. */
  +  public synchronized void addIndexes(IndexReader[] readers)
  +    throws IOException {
  +
  +    optimize();					  // start with zero or 1 seg
  +
  +    String mergedName = newSegmentName();
  +    SegmentMerger merger = new SegmentMerger(directory, mergedName, false);
  +
  +    if (segmentInfos.size() == 1)                 // add existing index, if any
  +      merger.add(new SegmentReader(segmentInfos.info(0)));
  +
  +    for (int i = 0; i < readers.length; i++)      // add new indexes
  +      merger.add(readers[i]);
  +
  +    int docCount = merger.merge();                // merge 'em
  +
  +    segmentInfos.setSize(0);                      // pop old infos & add new
  +    segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory));
  +
  +    synchronized (directory) {			  // in- & inter-process sync
  +      new Lock.With(directory.makeLock("commit.lock")) {
  +	  public Object doBody() throws IOException {
  +	    segmentInfos.write(directory);	  // commit changes
  +	    return null;
  +	  }
  +	}.run();
  +    }
  +  }
  +
     /** Merges all RAM-resident segments. */
     private final void flushRamSegments() throws IOException {
       int minSegment = segmentInfos.size()-1;
  @@ -379,12 +410,12 @@
       for (int i = minSegment; i < segmentInfos.size(); i++) {
         SegmentInfo si = segmentInfos.info(i);
         if (infoStream != null)
  -        infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
  -      SegmentReader reader = new SegmentReader(si);
  +	infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
  +      IndexReader reader = new SegmentReader(si);
         merger.add(reader);
  -      if ((reader.directory == this.directory) || // if we own the directory
  -          (reader.directory == this.ramDirectory))
  -        segmentsToDelete.addElement(reader);	  // queue segment for deletion
  +      if ((reader.directory()==this.directory) || // if we own the directory
  +          (reader.directory()==this.ramDirectory))
  +	segmentsToDelete.addElement(reader);	  // queue segment for deletion
         mergedDocCount += reader.numDocs();
       }
       if (infoStream != null) {
  @@ -420,10 +451,10 @@
   
       for (int i = 0; i < segments.size(); i++) {
         SegmentReader reader = (SegmentReader)segments.elementAt(i);
  -      if (reader.directory == this.directory)
  -        deleteFiles(reader.files(), deletable);	  // try to delete our files
  +      if (reader.directory() == this.directory)
  +	deleteFiles(reader.files(), deletable);	  // try to delete our files
         else
  -        deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
  +	deleteFiles(reader.files(), reader.directory()); // delete other files
       }
   
       writeDeleteableFiles(deletable);		  // note files we can't delete
  
  
  
  1.3       +6 -0      jakarta-lucene/src/java/org/apache/lucene/index/MultipleTermPositions.java
  
  Index: MultipleTermPositions.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/MultipleTermPositions.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MultipleTermPositions.java	7 Nov 2002 05:55:39 -0000	1.2
  +++ MultipleTermPositions.java	21 Oct 2003 17:59:16 -0000	1.3
  @@ -297,6 +297,11 @@
   	throw new UnsupportedOperationException();
       }
   
  +    public void seek(TermEnum termEnum) throws IOException {
  +      throw new UnsupportedOperationException();
  +    }
  +
  +
       /**
        * Describe <code>read</code> method here.
        *
  @@ -311,4 +316,5 @@
       {
   	throw new UnsupportedOperationException();
       }
  +
   }
  
  
  
  1.2       +11 -12    jakarta-lucene/src/java/org/apache/lucene/index/SegmentMergeInfo.java
  
  Index: SegmentMergeInfo.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMergeInfo.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SegmentMergeInfo.java	18 Sep 2001 16:29:53 -0000	1.1
  +++ SegmentMergeInfo.java	21 Oct 2003 17:59:16 -0000	1.2
  @@ -60,30 +60,29 @@
   final class SegmentMergeInfo {
     Term term;
     int base;
  -  SegmentTermEnum termEnum;
  -  SegmentReader reader;
  -  SegmentTermPositions postings;
  +  TermEnum termEnum;
  +  IndexReader reader;
  +  TermPositions postings;
     int[] docMap = null;				  // maps around deleted docs
   
  -  SegmentMergeInfo(int b, SegmentTermEnum te, SegmentReader r)
  +  SegmentMergeInfo(int b, TermEnum te, IndexReader r)
       throws IOException {
       base = b;
       reader = r;
       termEnum = te;
       term = te.term();
  -    postings = new SegmentTermPositions(r);
  +    postings = reader.termPositions();
   
  -    if (reader.deletedDocs != null) {
  -      // build array which maps document numbers around deletions 
  -      BitVector deletedDocs = reader.deletedDocs;
  +    // build array which maps document numbers around deletions 
  +    if (reader.hasDeletions()) {
         int maxDoc = reader.maxDoc();
         docMap = new int[maxDoc];
         int j = 0;
         for (int i = 0; i < maxDoc; i++) {
  -	if (deletedDocs.get(i))
  -	  docMap[i] = -1;
  -	else
  -	  docMap[i] = j++;
  +        if (reader.isDeleted(i))
  +          docMap[i] = -1;
  +        else
  +          docMap[i] = j++;
         }
       }
     }
  
  
  
  1.5       +51 -52    jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java
  
  Index: SegmentMerger.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- SegmentMerger.java	13 Oct 2003 14:27:28 -0000	1.4
  +++ SegmentMerger.java	21 Oct 2003 17:59:16 -0000	1.5
  @@ -83,29 +83,32 @@
       useCompoundFile = compoundFile;
     }
   
  -  final void add(SegmentReader reader) {
  +  final void add(IndexReader reader) {
       readers.addElement(reader);
     }
   
  -  final SegmentReader segmentReader(int i) {
  -    return (SegmentReader)readers.elementAt(i);
  +  final IndexReader segmentReader(int i) {
  +    return (IndexReader)readers.elementAt(i);
     }
   
  -  final void merge() throws IOException {
  +  final int merge() throws IOException {
  +    int value;
       try {
         mergeFields();
         mergeTerms();
  -      mergeNorms();
  +      value = mergeNorms();
         
       } finally {
         for (int i = 0; i < readers.size(); i++) {  // close readers
  -        SegmentReader reader = (SegmentReader)readers.elementAt(i);
  -        reader.close();
  +	IndexReader reader = (IndexReader)readers.elementAt(i);
  +	reader.close();
         }
       }
       
       if (useCompoundFile)
           createCompoundFile();
  +
  +    return value;
     }
   
     private final void createCompoundFile() 
  @@ -149,8 +152,9 @@
     private final void mergeFields() throws IOException {
       fieldInfos = new FieldInfos();		  // merge field names
       for (int i = 0; i < readers.size(); i++) {
  -      SegmentReader reader = (SegmentReader)readers.elementAt(i);
  -      fieldInfos.add(reader.fieldInfos);
  +      IndexReader reader = (IndexReader)readers.elementAt(i);
  +      fieldInfos.add(reader.getFieldNames(true), true);
  +      fieldInfos.add(reader.getFieldNames(false), false);
       }
       fieldInfos.write(directory, segment + ".fnm");
       
  @@ -158,12 +162,11 @@
         new FieldsWriter(directory, segment, fieldInfos);
       try {
         for (int i = 0; i < readers.size(); i++) {
  -        SegmentReader reader = (SegmentReader)readers.elementAt(i);
  -        BitVector deletedDocs = reader.deletedDocs;
  -        int maxDoc = reader.maxDoc();
  -        for (int j = 0; j < maxDoc; j++)
  -          if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
  -            fieldsWriter.addDocument(reader.document(j));
  +	IndexReader reader = (IndexReader)readers.elementAt(i);
  +	int maxDoc = reader.maxDoc();
  +	for (int j = 0; j < maxDoc; j++)
  +	  if (!reader.isDeleted(j))               // skip deleted docs
  +	    fieldsWriter.addDocument(reader.document(j));
         }
       } finally {
         fieldsWriter.close();
  @@ -196,8 +199,8 @@
       queue = new SegmentMergeQueue(readers.size());
       int base = 0;
       for (int i = 0; i < readers.size(); i++) {
  -      SegmentReader reader = (SegmentReader)readers.elementAt(i);
  -      SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms();
  +      IndexReader reader = (IndexReader)readers.elementAt(i);
  +      TermEnum termEnum = reader.terms();
         SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
         base += reader.numDocs();
         if (smi.next())
  @@ -246,42 +249,40 @@
         termInfosWriter.add(smis[0].term, termInfo);
       }
     }
  -       
  +
     private final int appendPostings(SegmentMergeInfo[] smis, int n)
          throws IOException {
       int lastDoc = 0;
       int df = 0;					  // number of docs w/ term
       for (int i = 0; i < n; i++) {
         SegmentMergeInfo smi = smis[i];
  -      SegmentTermPositions postings = smi.postings;
  +      TermPositions postings = smi.postings;
         int base = smi.base;
         int[] docMap = smi.docMap;
  -      smi.termEnum.termInfo(termInfo);
  -      postings.seek(termInfo);
  +      postings.seek(smi.termEnum);
         while (postings.next()) {
  -        int doc;
  -        if (docMap == null)
  -          doc = base + postings.doc;		  // no deletions
  -        else
  -          doc = base + docMap[postings.doc];	  // re-map around deletions
  +        int doc = postings.doc();
  +        if (docMap != null)
  +          doc = docMap[doc];                      // map around deletions
  +        doc += base;                              // convert to merged space
   
           if (doc < lastDoc)
             throw new IllegalStateException("docs out of order");
   
           int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
           lastDoc = doc;
  -
  -        int freq = postings.freq;
  +        
  +        int freq = postings.freq();
           if (freq == 1) {
             freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1
           } else {
             freqOutput.writeVInt(docCode);	  // write doc
             freqOutput.writeVInt(freq);		  // write frequency in doc
           }
  -          
  +	  
           int lastPosition = 0;			  // write position deltas
  -        for (int j = 0; j < freq; j++) {
  -          int position = postings.nextPosition();
  +	for (int j = 0; j < freq; j++) {
  +	  int position = postings.nextPosition();
             proxOutput.writeVInt(position - lastPosition);
             lastPosition = position;
           }
  @@ -291,33 +292,31 @@
       }
       return df;
     }
  -
  -  private final void mergeNorms() throws IOException {
  +  private final int mergeNorms() throws IOException {
  +    int docCount = 0;
       for (int i = 0; i < fieldInfos.size(); i++) {
         FieldInfo fi = fieldInfos.fieldInfo(i);
         if (fi.isIndexed) {
  -        OutputStream output = directory.createFile(segment + ".f" + i);
  -        try {
  -          for (int j = 0; j < readers.size(); j++) {
  -            SegmentReader reader = (SegmentReader)readers.elementAt(j);
  -            BitVector deletedDocs = reader.deletedDocs;
  -            InputStream input = reader.normStream(fi.name);
  +	OutputStream output = directory.createFile(segment + ".f" + i);
  +	try {
  +	  for (int j = 0; j < readers.size(); j++) {
  +	    IndexReader reader = (IndexReader)readers.elementAt(j);
  +	    byte[] input = reader.norms(fi.name);
               int maxDoc = reader.maxDoc();
  -            try {
  -              for (int k = 0; k < maxDoc; k++) {
  -                byte norm = input != null ? input.readByte() : (byte)0;
  -                if (deletedDocs == null || !deletedDocs.get(k))
  -                  output.writeByte(norm);
  +            for (int k = 0; k < maxDoc; k++) {
  +              byte norm = input != null ? input[k] : (byte)0;
  +              if (!reader.isDeleted(k)) {
  +                output.writeByte(norm);
  +                docCount++;
                 }
  -            } finally {
  -              if (input != null)
  -                input.close();
  -            }
  -          }
  -        } finally {
  -          output.close();
  -        }
  +	    }
  +	  }
  +	} finally {
  +	  output.close();
  +	}
         }
       }
  +    return docCount;
     }
  +
   }
  
  
  
  1.14      +18 -15    jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
  
  Index: SegmentReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- SegmentReader.java	17 Oct 2003 10:49:42 -0000	1.13
  +++ SegmentReader.java	21 Oct 2003 17:59:16 -0000	1.14
  @@ -110,9 +110,9 @@
       segment = si.name;
   
       // Use compound file directory for some files, if it exists
  -    Directory cfsDir = directory;
  -    if (directory.fileExists(segment + ".cfs")) {
  -      cfsReader = new CompoundFileReader(directory, segment + ".cfs");
  +    Directory cfsDir = directory();
  +    if (directory().fileExists(segment + ".cfs")) {
  +      cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
         cfsDir = cfsReader;
       }
   
  @@ -124,7 +124,7 @@
   
       // NOTE: the bitvector is stored using the regular directory, not cfs
       if (hasDeletions(si))
  -      deletedDocs = new BitVector(directory, segment + ".del");
  +      deletedDocs = new BitVector(directory(), segment + ".del");
   
       // make sure that all index files have been read or are kept open
       // so that if an index update removes them we'll still have them
  @@ -133,16 +133,15 @@
       openNorms(cfsDir);
     }
   
  -
  -  final synchronized void doClose() throws IOException {
  +  protected final synchronized void doClose() throws IOException {
       if (deletedDocsDirty) {
  -      synchronized (directory) {		  // in- & inter-process sync
  -        new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
  +      synchronized (directory()) {		  // in- & inter-process sync
  +        new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
             IndexWriter.COMMIT_LOCK_TIMEOUT) {
             public Object doBody() throws IOException {
  -            deletedDocs.write(directory, segment + ".tmp");
  -            directory.renameFile(segment + ".tmp", segment + ".del");
  -            directory.touchFile("segments");
  +            deletedDocs.write(directory(), segment + ".tmp");
  +            directory().renameFile(segment + ".tmp", segment + ".del");
  +            directory().touchFile("segments");
               return null;
             }
           }.run();
  @@ -164,18 +163,22 @@
         cfsReader.close();
   
       if (closeDirectory)
  -      directory.close();
  +      directory().close();
     }
   
     static final boolean hasDeletions(SegmentInfo si) throws IOException {
       return si.dir.fileExists(si.name + ".del");
     }
   
  +  public boolean hasDeletions() {
  +    return deletedDocs != null;
  +  }
  +
     static final boolean usesCompoundFile(SegmentInfo si) throws IOException {
       return si.dir.fileExists(si.name + ".cfs");
     }
   
  -  final synchronized void doDelete(int docNum) throws IOException {
  +  protected final synchronized void doDelete(int docNum) throws IOException {
       if (deletedDocs == null)
         deletedDocs = new BitVector(maxDoc());
       deletedDocsDirty = true;
  @@ -190,7 +193,7 @@
   
       for (int i=0; i<ext.length; i++) {
         String name = segment + "." + ext[i];
  -      if (directory.fileExists(name))
  +      if (directory().fileExists(name))
           files.addElement(name);
       }
   
  
  
  
  1.3       +9 -0      jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java
  
  Index: SegmentTermDocs.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SegmentTermDocs.java	21 Jan 2002 17:07:23 -0000	1.2
  +++ SegmentTermDocs.java	21 Oct 2003 17:59:16 -0000	1.3
  @@ -78,6 +78,15 @@
       seek(ti);
     }
     
  +  public void seek(TermEnum enum) throws IOException {
  +    TermInfo ti;
  +    if (enum instanceof SegmentTermEnum)          // optimized case
  +      ti = ((SegmentTermEnum)enum).termInfo();
  +    else                                          // punt case
  +      ti = parent.tis.get(enum.term());
  +    seek(ti);
  +  }
  +  
     void seek(TermInfo ti) throws IOException {
       if (ti == null) {
         freqCount = 0;
  
  
  
  1.4       +2 -1      jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java
  
  Index: SegmentTermPositions.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- SegmentTermPositions.java	8 Feb 2002 22:52:03 -0000	1.3
  +++ SegmentTermPositions.java	21 Oct 2003 17:59:16 -0000	1.4
  @@ -106,6 +106,7 @@
   
     public final int read(final int[] docs, final int[] freqs)
         throws IOException {
  -    throw new RuntimeException();
  +    throw new UnsupportedOperationException();
     }
  +
   }
  
  
  
  1.13      +15 -3     jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java
  
  Index: SegmentsReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- SegmentsReader.java	10 Sep 2003 14:27:37 -0000	1.12
  +++ SegmentsReader.java	21 Oct 2003 17:59:16 -0000	1.13
  @@ -76,6 +76,7 @@
     private Hashtable normsCache = new Hashtable();
     private int maxDoc = 0;
     private int numDocs = -1;
  +  private boolean hasDeletions = false;
   
     SegmentsReader(Directory directory, SegmentReader[] r) throws IOException {
       super(directory);
  @@ -84,6 +85,9 @@
       for (int i = 0; i < readers.length; i++) {
         starts[i] = maxDoc;
         maxDoc += readers[i].maxDoc();		  // compute maxDocs
  +
  +      if (readers[i].hasDeletions())
  +        hasDeletions = true;
       }
       starts[readers.length] = maxDoc;
     }
  @@ -112,10 +116,13 @@
       return readers[i].isDeleted(n - starts[i]);	  // dispatch to segment reader
     }
   
  -  final synchronized void doDelete(int n) throws IOException {
  +  public boolean hasDeletions() { return hasDeletions; }
  +
  +  protected final synchronized void doDelete(int n) throws IOException {
       numDocs = -1;				  // invalidate cache
       int i = readerIndex(n);			  // find segment num
       readers[i].doDelete(n - starts[i]);		  // dispatch to segment reader
  +    hasDeletions = true;
     }
   
     private final int readerIndex(int n) {	  // find reader for doc n:
  @@ -174,7 +181,7 @@
       return new SegmentsTermPositions(readers, starts);
     }
   
  -  final synchronized void doClose() throws IOException {
  +  protected final synchronized void doClose() throws IOException {
       for (int i = 0; i < readers.length; i++)
         readers[i].close();
     }
  @@ -309,6 +316,10 @@
       this.current = null;
     }
   
  +  public void seek(TermEnum termEnum) throws IOException {
  +    seek(termEnum.term());
  +  }
  +
     public final boolean next() throws IOException {
       if (current != null && current.next()) {
         return true;
  @@ -389,4 +400,5 @@
     public final int nextPosition() throws IOException {
       return ((SegmentTermPositions)current).nextPosition();
     }
  +
   }
  
  
  
  1.5       +5 -0      jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java
  
  Index: TermDocs.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- TermDocs.java	29 Jan 2003 17:18:54 -0000	1.4
  +++ TermDocs.java	21 Oct 2003 17:59:16 -0000	1.5
  @@ -71,6 +71,11 @@
      */
     void seek(Term term) throws IOException;
   
  +  /** Sets this to the data for the current term in a {@link TermEnum}.
  +   * This may be optimized in some implementations.
  +   */
  +  void seek(TermEnum termEnum) throws IOException;
  +
     /** Returns the current document number.  <p> This is invalid until {@link
         #next()} is called for the first time.*/
     int doc();
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
  
  Index: FilterIndexReader.java
  ===================================================================
  package org.apache.lucene.index;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2003 The Apache Software Foundation. All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.io.IOException;
  import java.util.Collection;
  
  import org.apache.lucene.document.Document;
  
  /**  A <code>FilterIndexReader</code> contains another IndexReader, which it
   * uses as its basic source of data, possibly transforming the data along the
   * way or providing additional functionality. The class
   * <code>FilterIndexReader</code> itself simply implements all abstract methods
   * of <code>IndexReader</code> with versions that pass all requests to the
   * contained index reader. Subclasses of <code>FilterIndexReader</code> may
   * further override some of these methods and may also provide additional
   * methods and fields.
  */
  public class FilterIndexReader extends IndexReader {
  
    /** Base class for filtering {@link TermDocs} implementations. */
    public static class FilterTermDocs implements TermDocs {
      protected TermDocs in;
  
      public FilterTermDocs(TermDocs in) { this.in = in; }
  
      public void seek(Term term) throws IOException { in.seek(term); }
      public void seek(TermEnum enum) throws IOException { in.seek(enum); }
      public int doc() { return in.doc(); }
      public int freq() { return in.freq(); }
      public boolean next() throws IOException { return in.next(); }
      public int read(int[] docs, int[] freqs) throws IOException {
        return in.read(docs, freqs);
      }
      public boolean skipTo(int i) throws IOException { return in.skipTo(i); }
      public void close() throws IOException { in.close(); } 
    }
  
    /** Base class for filtering {@link TermPositions} implementations. */
    public static class FilterTermPositions
       extends FilterTermDocs implements TermPositions {
  
      public FilterTermPositions(TermPositions in) { super(in); }
  
      public int nextPosition() throws IOException {
        return ((TermPositions)in).nextPosition();
      }
    }
  
    /** Base class for filtering {@link TermEnum} implementations. */
    public static class FilterTermEnum extends TermEnum {
      protected TermEnum in;
  
      public FilterTermEnum(TermEnum in) { this.in = in; }
  
      public boolean next() throws IOException { return in.next(); }
      public Term term() { return in.term(); }
      public int docFreq() { return in.docFreq(); }
      public void close() throws IOException { in.close(); }
    }
  
    protected IndexReader in;
  
    public FilterIndexReader(IndexReader in) {
      super(in.directory());
      this.in = in;
    }
  
    public int numDocs() { return in.numDocs(); }
    public int maxDoc() { return in.maxDoc(); }
  
    public Document document(int n) throws IOException {return in.document(n);}
  
    public boolean isDeleted(int n) { return in.isDeleted(n); }
    public boolean hasDeletions() { return in.hasDeletions(); }
  
    public byte[] norms(String f) throws IOException { return in.norms(f); }
  
    public TermEnum terms() throws IOException { return in.terms(); }
    public TermEnum terms(Term t) throws IOException { return in.terms(t); }
  
    public int docFreq(Term t) throws IOException { return in.docFreq(t); }
  
    public TermDocs termDocs() throws IOException { return in.termDocs(); }
    public TermPositions termPositions() throws IOException {
      return in.termPositions();
    }
  
    protected void doDelete(int n) throws IOException { in.doDelete(n); }
    protected void doClose() throws IOException { in.doClose(); }
  
    public Collection getFieldNames() throws IOException {
      return in.getFieldNames();
    }
    public Collection getFieldNames(boolean indexed) throws IOException {
      return in.getFieldNames(indexed);
    }
  }
  
  
  
  1.1                  jakarta-lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java
  
  Index: TestFilterIndexReader.java
  ===================================================================
  package org.apache.lucene.index;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Lucene" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Lucene", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  import junit.framework.TestCase;
  import junit.framework.TestSuite;
  import junit.textui.TestRunner;
  import junit.framework.TestResult;
  
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Searcher;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.search.TermQuery;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.RAMDirectory;
  import org.apache.lucene.store.FSDirectory;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.analysis.WhitespaceAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  
  import java.util.Collection;
  import java.io.IOException;
  
  public class TestFilterIndexReader extends TestCase {
  
    private static class TestReader extends FilterIndexReader {
  
      /** Filter that only permits terms containing 'e'.*/
      private static class TestTermEnum extends FilterTermEnum {
        public TestTermEnum(TermEnum enum)
          throws IOException {
          super(enum);
        }
  
        /** Scan for terms containing the letter 'e'.*/
        public boolean next() throws IOException {
          while (in.next()) {
            if (in.term().text().indexOf('e') != -1)
              return true;
          }
          return false;
        }
      }
      
      /** Filter that only returns odd numbered documents. */
      private static class TestTermPositions extends FilterTermPositions {
        public TestTermPositions(TermPositions in)
          throws IOException {
          super(in);
        }
  
        /** Scan for odd numbered documents. */
        public boolean next() throws IOException {
          while (in.next()) {
            if ((in.doc() % 2) == 1)
              return true;
          }
          return false;
        }
      }
      
      public TestReader(IndexReader reader) {
        super(reader);
      }
  
      /** Filter terms with TestTermEnum. */
      public TermEnum terms() throws IOException {
        return new TestTermEnum(in.terms());
      }
  
      /** Filter positions with TestTermPositions. */
      public TermPositions termPositions() throws IOException {
        return new TestTermPositions(in.termPositions());
      }
    }
  
  
    /** Main for running test case by itself. */
    public static void main(String args[]) {
      TestRunner.run (new TestSuite(TestIndexReader.class));
    }
      
    /**
     * Tests the IndexReader.getFieldNames implementation
     * @throws Exception on error
     */
    public void testFilterIndexReader() throws Exception {
      RAMDirectory directory = new RAMDirectory();
      IndexWriter writer =
        new IndexWriter(directory, new WhitespaceAnalyzer(), true);
  
      Document d1 = new Document();
      d1.add(Field.Text("default","one two"));
      writer.addDocument(d1);
  
      Document d2 = new Document();
      d2.add(Field.Text("default","one three"));
      writer.addDocument(d2);
  
      Document d3 = new Document();
      d3.add(Field.Text("default","two four"));
      writer.addDocument(d3);
  
      writer.close();
  
      IndexReader reader = new TestReader(IndexReader.open(directory));
  
      TermEnum terms = reader.terms();
      while (terms.next()) {
        assertTrue(terms.term().text().indexOf('e') != -1);
      }
      terms.close();
      
      TermPositions positions = reader.termPositions(new Term("default", "one"));
      while (positions.next()) {
        assertTrue((positions.doc() % 2) == 1);
      }
  
      reader.close();
    }
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message