lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Otis Gospodnetic <otis_gospodne...@yahoo.com>
Subject Re: getAllFieldNames diffs
Date Tue, 12 Nov 2002 20:27:28 GMT
Nice :)
I looked at the code first, and was about to ask - why not just return
String[]?  What is the advantage of Collection in this case?

Thanks,
Otis

--- Peter Mularien <pmularien@deploy.com> wrote:
> Attached. Please comment or critique. I've used the
> java.util.Collection 
> classes here extensively, which may be an issue since other parts of
> the 
> API return Enumeration. Collections have been preferred since JDK 1.2
> 
> for a number of reasons I won't go into here. Please let me know if 
> these are OK to use in Lucene.
> 
> I have added an [ambitiously named (: ] TestIndexReader to the unit 
> tests, it contains a couple simple unit tests for this functionality.
> 
> Diffs are also attached.
> 
> Thanks
> Peter
> > ? src/test/org/apache/lucene/index/TestIndexReader.java
> Index: src/java/org/apache/lucene/index/IndexReader.java
> ===================================================================
> RCS file:
>
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
> retrieving revision 1.11
> diff -u -c -r1.11 IndexReader.java
> *** src/java/org/apache/lucene/index/IndexReader.java	7 Nov 2002
> 05:55:39 -0000	1.11
> --- src/java/org/apache/lucene/index/IndexReader.java	12 Nov 2002
> 20:11:20 -0000
> ***************
> *** 56,61 ****
> --- 56,63 ----
>   
>   import java.io.IOException;
>   import java.io.File;
> + import java.util.Collection;
> + 
>   import org.apache.lucene.store.Directory;
>   import org.apache.lucene.store.FSDirectory;
>   import org.apache.lucene.store.Lock;
> ***************
> *** 301,306 ****
> --- 303,316 ----
>         writeLock = null;
>       }
>     }
> + 
> +     /**
> +      * Return a list of all unique field names which exist in the
> index pointed to by
> +      * this IndexReader.
> +      * @return Collection of Strings indicating the names of the
> fields
> +      * @throws IOException if there is a problem with accessing the
> index
> +      */
> +     public abstract Collection getFieldNames() throws IOException;
>   
>     /**
>      * Returns <code>true</code> iff the index in the named directory
> is
> Index: src/java/org/apache/lucene/index/SegmentReader.java
> ===================================================================
> RCS file:
>
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
> retrieving revision 1.6
> diff -u -c -r1.6 SegmentReader.java
> *** src/java/org/apache/lucene/index/SegmentReader.java	7 Nov 2002
> 05:55:39 -0000	1.6
> --- src/java/org/apache/lucene/index/SegmentReader.java	12 Nov 2002
> 20:11:20 -0000
> ***************
> *** 55,69 ****
>    */
>   
>   import java.io.IOException;
> ! import java.util.Hashtable;
>   import java.util.Enumeration;
>   import java.util.Vector;
>   
> - import org.apache.lucene.util.BitVector;
> - import org.apache.lucene.store.Directory;
> - import org.apache.lucene.store.Lock;
> - import org.apache.lucene.store.InputStream;
>   import org.apache.lucene.document.Document;
>   
>   final class SegmentReader extends IndexReader {
>     private boolean closeDirectory = false;
> --- 55,71 ----
>    */
>   
>   import java.io.IOException;
> ! import java.util.Collection;
>   import java.util.Enumeration;
> + import java.util.HashSet;
> + import java.util.Hashtable;
> + import java.util.Set;
>   import java.util.Vector;
>   
>   import org.apache.lucene.document.Document;
> + import org.apache.lucene.store.InputStream;
> + import org.apache.lucene.store.Lock;
> + import org.apache.lucene.util.BitVector;
>   
>   final class SegmentReader extends IndexReader {
>     private boolean closeDirectory = false;
> ***************
> *** 73,79 ****
>     private FieldsReader fieldsReader;
>   
>     TermInfosReader tis;
> !   
>     BitVector deletedDocs = null;
>     private boolean deletedDocsDirty = false;
>   
> --- 75,81 ----
>     private FieldsReader fieldsReader;
>   
>     TermInfosReader tis;
> ! 
>     BitVector deletedDocs = null;
>     private boolean deletedDocsDirty = false;
>   
> ***************
> *** 113,119 ****
>       proxStream = directory.openFile(segment + ".prx");
>       openNorms();
>     }
> !   
>     final synchronized void doClose() throws IOException {
>       if (deletedDocsDirty) {
>         synchronized (directory) {		  // in- & inter-process sync
> --- 115,121 ----
>       proxStream = directory.openFile(segment + ".prx");
>       openNorms();
>     }
> ! 
>     final synchronized void doClose() throws IOException {
>       if (deletedDocsDirty) {
>         synchronized (directory) {		  // in- & inter-process sync
> ***************
> *** 256,262 ****
>     private final void openNorms() throws IOException {
>       for (int i = 0; i < fieldInfos.size(); i++) {
>         FieldInfo fi = fieldInfos.fieldInfo(i);
> !       if (fi.isIndexed) 
>   	norms.put(fi.name,
>   		  new Norm(directory.openFile(segment + ".f" + fi.number)));
>       }
> --- 258,264 ----
>     private final void openNorms() throws IOException {
>       for (int i = 0; i < fieldInfos.size(); i++) {
>         FieldInfo fi = fieldInfos.fieldInfo(i);
> !       if (fi.isIndexed)
>   	norms.put(fi.name,
>   		  new Norm(directory.openFile(segment + ".f" + fi.number)));
>       }
> ***************
> *** 271,274 ****
> --- 273,287 ----
>         }
>       }
>     }
> + 
> +     // javadoc inherited
> +     public Collection getFieldNames() throws IOException {
> +         // maintain a unique set of field names
> +         Set fieldSet = new HashSet();
> +         for (int i = 0; i < fieldInfos.size(); i++) {
> +             FieldInfo fi = fieldInfos.fieldInfo(i);
> +             fieldSet.add(fi.name);
> +         }
> +         return fieldSet;
> +     }
>   }
> Index: src/java/org/apache/lucene/index/SegmentsReader.java
> ===================================================================
> RCS file:
>
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java,v
> retrieving revision 1.9
> diff -u -c -r1.9 SegmentsReader.java
> *** src/java/org/apache/lucene/index/SegmentsReader.java	7 Nov 2002
> 05:55:39 -0000	1.9
> --- src/java/org/apache/lucene/index/SegmentsReader.java	12 Nov 2002
> 20:11:20 -0000
> ***************
> *** 55,64 ****
>    */
>   
>   import java.io.IOException;
>   import java.util.Hashtable;
>   
> - import org.apache.lucene.store.Directory;
>   import org.apache.lucene.document.Document;
>   
>   /**
>    * FIXME: Describe class <code>SegmentsReader</code> here.
> --- 55,68 ----
>    */
>   
>   import java.io.IOException;
> + import java.util.Collection;
> + import java.util.HashSet;
>   import java.util.Hashtable;
> + import java.util.Iterator;
> + import java.util.Set;
>   
>   import org.apache.lucene.document.Document;
> + import org.apache.lucene.store.Directory;
>   
>   /**
>    * FIXME: Describe class <code>SegmentsReader</code> here.
> ***************
> *** 174,179 ****
> --- 178,199 ----
>       for (int i = 0; i < readers.length; i++)
>         readers[i].close();
>     }
> + 
> +     // javadoc inherited
> +     public Collection getFieldNames() throws IOException {
> +         // maintain a unique set of field names
> +         Set fieldSet = new HashSet();
> +         for (int i = 0; i < readers.length; i++) {
> +             SegmentReader reader = readers[i];
> +             Collection names = reader.getFieldNames();
> +             // iterate through the field names and add them to the
> set
> +             for (Iterator iterator = names.iterator();
> iterator.hasNext();) {
> +                 String s = (String) iterator.next();
> +                 fieldSet.add(s);
> +             }
> +         }
> +         return fieldSet;
> +     }
>   }
>   
>   class SegmentsTermEnum extends TermEnum {
> > package org.apache.lucene.index;
> 
> import junit.framework.TestCase;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> 
> import java.util.Collection;
> import java.io.IOException;
> 
> /*
> ====================================================================
>  * The Apache Software License, Version 1.1
>  *
>  * Copyright (c) 2001 The Apache Software Foundation.  All rights
>  * reserved.
>  *
>  * Redistribution and use in source and binary forms, with or without
>  * modification, are permitted provided that the following conditions
>  * are met:
>  *
>  * 1. Redistributions of source code must retain the above copyright
>  *    notice, this list of conditions and the following disclaimer.
>  *
>  * 2. Redistributions in binary form must reproduce the above
> copyright
>  *    notice, this list of conditions and the following disclaimer in
>  *    the documentation and/or other materials provided with the
>  *    distribution.
>  *
>  * 3. The end-user documentation included with the redistribution,
>  *    if any, must include the following acknowledgment:
>  *       "This product includes software developed by the
>  *        Apache Software Foundation (http://www.apache.org/)."
>  *    Alternately, this acknowledgment may appear in the software
> itself,
>  *    if and wherever such third-party acknowledgments normally
> appear.
>  *
>  * 4. The names "Apache" and "Apache Software Foundation" and
>  *    "Apache Lucene" must not be used to endorse or promote products
>  *    derived from this software without prior written permission.
> For
>  *    written permission, please contact apache@apache.org.
>  *
>  * 5. Products derived from this software may not be called "Apache",
>  *    "Apache Lucene", nor may "Apache" appear in their name, without
>  *    prior written permission of the Apache Software Foundation.
>  *
>  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
>  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
>  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
>  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
>  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
>  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND
>  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
>  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>  * SUCH DAMAGE.
>  *
> ====================================================================
>  *
>  * This software consists of voluntary contributions made by many
>  * individuals on behalf of the Apache Software Foundation.  For more
>  * information on the Apache Software Foundation, please see
>  * <http://www.apache.org/>.
>  */
> 
> public class TestIndexReader extends TestCase {
>     /**
>      * Test the IndexReader.getFieldNames implementation
>      * @throws Exception on error
>      */
>     public void testGetFieldNames() throws Exception {
>         RAMDirectory d = new RAMDirectory();
>         // set up writer
>         IndexWriter writer = new IndexWriter(d, new
> StandardAnalyzer(), true);
>         addDocumentWithFields(writer);
>         writer.close();
>         // set up reader
>         IndexReader reader = IndexReader.open(d);
>         Collection fieldNames = reader.getFieldNames();
>         assertTrue(fieldNames.contains("keyword"));
>         assertTrue(fieldNames.contains("text"));
>         assertTrue(fieldNames.contains("unindexed"));
>         assertTrue(fieldNames.contains("unstored"));
>         // add more documents
>         writer = new IndexWriter(d, new StandardAnalyzer(), false);
>         // want to get some more segments here
>         for(int i=0;i<5*writer.mergeFactor;i++) {
>             addDocumentWithFields(writer);
>         }
>         // new fields are in some different segments (we hope)
>         for(int i=0;i<5*writer.mergeFactor;i++) {
>             addDocumentWithDifferentFields(writer);
>         }
>         writer.close();
>         // verify fields again
>         reader = IndexReader.open(d);
>         fieldNames = reader.getFieldNames();
>         assertTrue(fieldNames.contains("keyword"));
>         assertTrue(fieldNames.contains("text"));
>         assertTrue(fieldNames.contains("unindexed"));
>         assertTrue(fieldNames.contains("unstored"));
>         assertTrue(fieldNames.contains("keyword2"));
>         assertTrue(fieldNames.contains("text2"));
>         assertTrue(fieldNames.contains("unindexed2"));
>         assertTrue(fieldNames.contains("unstored2"));
>     }
> 
>     private void addDocumentWithFields(IndexWriter writer) throws
> IOException {
>         Document doc = new Document();
>         doc.add(Field.Keyword("keyword","test1"));
>         doc.add(Field.Text("text","test1"));
>         doc.add(Field.UnIndexed("unindexed","test1"));
>         doc.add(Field.UnStored("unstored","test1"));
>         writer.addDocument(doc);
>     }
> 
>     private void addDocumentWithDifferentFields(IndexWriter writer)
> throws IOException {
>         Document doc = new Document();
>         doc.add(Field.Keyword("keyword2","test1"));
>         doc.add(Field.Text("text2","test1"));
>         doc.add(Field.UnIndexed("unindexed2","test1"));
>         doc.add(Field.UnStored("unstored2","test1"));
>         writer.addDocument(doc);
>     }
> }
> 
> > --
> To unsubscribe, e-mail:  
> <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
> For additional commands, e-mail:
<mailto:lucene-dev-help@jakarta.apache.org>


__________________________________________________
Do you Yahoo!?
U2 on LAUNCH - Exclusive greatest hits videos
http://launch.yahoo.com/u2

--
To unsubscribe, e-mail:   <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>


Mime
View raw message