lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Erik Hatcher <e...@ehatchersolutions.com>
Subject Fwd: cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de WordlistLoader.java
Date Wed, 10 Mar 2004 00:24:29 GMT
My apologies for the errant commit of WordlistLoader... it was a local  
change (formatting clean-up) and did not intend to commit it.

But here is the change to StopFilter that has been discussed.  I went  
ahead and converted it to a Set.  Do we really need makeStopSet to be  
public?  If you have a String[], just call the constructor that takes  
it would be my recommendation.  I'll happily make it protected.

	Erik

Begin forwarded message:

> From: ehatcher@apache.org
> Date: March 9, 2004 7:18:02 PM EST
> To: jakarta-lucene-cvs@apache.org
> Subject: cvs commit:  
> jakarta-lucene/src/java/org/apache/lucene/analysis/de  
> WordlistLoader.java
> Reply-To: "Lucene Developers List" <lucene-dev@jakarta.apache.org>
>
> ehatcher    2004/03/09 16:18:02
>
>   Modified:    src/java/org/apache/lucene/analysis StopFilter.java
>                src/java/org/apache/lucene/analysis/de  
> WordlistLoader.java
>   Log:
>   convert Hashtable to Set, to avoid unnecessary synchronization issues
>
>   Revision  Changes    Path
>   1.7       +51 -12     
> jakarta-lucene/src/java/org/apache/lucene/analysis/StopFilter.java
>
>   Index: StopFilter.java
>   ===================================================================
>   RCS file:  
> /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/ 
> StopFilter.java,v
>   retrieving revision 1.6
>   retrieving revision 1.7
>   diff -u -r1.6 -r1.7
>   --- StopFilter.java	5 Dec 2003 14:30:12 -0000	1.6
>   +++ StopFilter.java	10 Mar 2004 00:18:02 -0000	1.7
>   @@ -55,31 +55,55 @@
>     */
>
>    import java.io.IOException;
>   +import java.util.HashSet;
>    import java.util.Hashtable;
>   +import java.util.Set;
>
>   -/** Removes stop words from a token stream. */
>   +/**
>   + * Removes stop words from a token stream.
>   + */
>
>    public final class StopFilter extends TokenFilter {
>
>   -  private Hashtable table;
>   +  private Set table;
>
>   -  /** Constructs a filter which removes words from the input
>   -   TokenStream that are named in the array of words. */
>   +  /**
>   +   * Constructs a filter which removes words from the input
>   +   * TokenStream that are named in the array of words.
>   +   */
>      public StopFilter(TokenStream in, String[] stopWords) {
>        super(in);
>   -    table = makeStopTable(stopWords);
>   +    table = makeStopSet(stopWords);
>      }
>
>   -  /** Constructs a filter which removes words from the input
>   -   TokenStream that are named in the Hashtable. */
>   +  /**
>   +   * Constructs a filter which removes words from the input
>   +   * TokenStream that are named in the Hashtable.
>   +   *
>   +   * @deprecated Use {@link #StopFilter(TokenStream, Set)}  
> StopFilter(TokenStream,Map)} instead
>   +   */
>      public StopFilter(TokenStream in, Hashtable stopTable) {
>        super(in);
>   +    table = stopTable.keySet();
>   +  }
>   +
>   +  /**
>   +   * Constructs a filter which removes words from the input
>   +   * TokenStream that are named in the Set.
>   +   */
>   +  public StopFilter(TokenStream in, Set stopTable) {
>   +    super(in);
>        table = stopTable;
>      }
>
>   -  /** Builds a Hashtable from an array of stop words, appropriate  
> for passing
>   -   into the StopFilter constructor.  This permits this table  
> construction to
>   -   be cached once when an Analyzer is constructed. */
>   +  /**
>   +   * Builds a Hashtable from an array of stop words,
>   +   * appropriate for passing into the StopFilter constructor.
>   +   * This permits this table construction to be cached once when
>   +   * an Analyzer is constructed.
>   +   *
>   +   * @deprecated Use {@link #makeStopSet(String[] makeStopSet)  
> instead.
>   +   */
>      public static final Hashtable makeStopTable(String[] stopWords) {
>        Hashtable stopTable = new Hashtable(stopWords.length);
>        for (int i = 0; i < stopWords.length; i++)
>   @@ -87,11 +111,26 @@
>        return stopTable;
>      }
>
>   -  /** Returns the next input Token whose termText() is not a stop  
> word. */
>   +  /**
>   +   * Builds a Set from an array of stop words,
>   +   * appropriate for passing into the StopFilter constructor.
>   +   * This permits this table construction to be cached once when
>   +   * an Analyzer is constructed.
>   +   */
>   +  public static final Set makeStopSet(String[] stopWords) {
>   +    Set stopTable = new HashSet(stopWords.length);
>   +    for (int i = 0; i < stopWords.length; i++)
>   +      stopTable.add(stopWords[i]);
>   +    return stopTable;
>   +  }
>   +
>   +  /**
>   +   * Returns the next input Token whose termText() is not a stop  
> word.
>   +   */
>      public final Token next() throws IOException {
>        // return the first non-stop word found
>        for (Token token = input.next(); token != null; token =  
> input.next())
>   -      if (table.get(token.termText) == null)
>   +      if (!table.contains(token.termText))
>            return token;
>        // reached EOS -- return null
>        return null;
>
>
>
>   1.5       +60 -61     
> jakarta-lucene/src/java/org/apache/lucene/analysis/de/ 
> WordlistLoader.java
>
>   Index: WordlistLoader.java
>   ===================================================================
>   RCS file:  
> /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/ 
> WordlistLoader.java,v
>   retrieving revision 1.4
>   retrieving revision 1.5
>   diff -u -r1.4 -r1.5
>   --- WordlistLoader.java	18 Aug 2002 17:33:16 -0000	1.4
>   +++ WordlistLoader.java	10 Mar 2004 00:18:02 -0000	1.5
>   @@ -68,71 +68,70 @@
>     * @author    Gerhard Schwarz
>     * @version   $Id$
>     */
>   -public class WordlistLoader
>   -{
>   -    /**
>   -     * @param path      Path to the wordlist
>   -     * @param wordfile  Name of the wordlist
>   -     */
>   -    public static Hashtable getWordtable( String path, String  
> wordfile ) {
>   -	if ( path == null || wordfile == null ) {
>   -	    return new Hashtable();
>   -	}
>   -	return getWordtable( new File( path, wordfile ) );
>   +public class WordlistLoader {
>   +  /**
>   +   * @param path      Path to the wordlist
>   +   * @param wordfile  Name of the wordlist
>   +   */
>   +  public static Hashtable getWordtable(String path, String  
> wordfile) {
>   +    if (path == null || wordfile == null) {
>   +      return new Hashtable();
>        }
>   +    return getWordtable(new File(path, wordfile));
>   +  }
>
>   -    /**
>   -     * @param wordfile  Complete path to the wordlist
>   -     */
>   -    public static Hashtable getWordtable( String wordfile ) {
>   -	if ( wordfile == null ) {
>   -	    return new Hashtable();
>   -	}
>   -	return getWordtable( new File( wordfile ) );
>   +  /**
>   +   * @param wordfile  Complete path to the wordlist
>   +   */
>   +  public static Hashtable getWordtable(String wordfile) {
>   +    if (wordfile == null) {
>   +      return new Hashtable();
>        }
>   +    return getWordtable(new File(wordfile));
>   +  }
>
>   -    /**
>   -     * @param wordfile  File containing the wordlist
>   -     */
>   -    public static Hashtable getWordtable( File wordfile ) {
>   -	if ( wordfile == null ) {
>   -	    return new Hashtable();
>   -	}
>   -	Hashtable result = null;
>   -	try {
>   -	    LineNumberReader lnr = new LineNumberReader( new FileReader(  
> wordfile ) );
>   -	    String word = null;
>   -	    String[] stopwords = new String[100];
>   -	    int wordcount = 0;
>   -	    while ( ( word = lnr.readLine() ) != null ) {
>   -		wordcount++;
>   -		if ( wordcount == stopwords.length ) {
>   -		    String[] tmp = new String[stopwords.length + 50];
>   -		    System.arraycopy( stopwords, 0, tmp, 0, wordcount );
>   -		    stopwords = tmp;
>   -		}
>   -		stopwords[wordcount-1] = word;
>   -	    }
>   -	    result = makeWordTable( stopwords, wordcount );
>   -	}
>   -	// On error, use an empty table
>   -	catch ( IOException e ) {
>   -	    result = new Hashtable();
>   -	}
>   -	return result;
>   +  /**
>   +   * @param wordfile  File containing the wordlist
>   +   */
>   +  public static Hashtable getWordtable(File wordfile) {
>   +    if (wordfile == null) {
>   +      return new Hashtable();
>        }
>   +    Hashtable result = null;
>   +    try {
>   +      LineNumberReader lnr = new LineNumberReader(new  
> FileReader(wordfile));
>   +      String word = null;
>   +      String[] stopwords = new String[100];
>   +      int wordcount = 0;
>   +      while ((word = lnr.readLine()) != null) {
>   +        wordcount++;
>   +        if (wordcount == stopwords.length) {
>   +          String[] tmp = new String[stopwords.length + 50];
>   +          System.arraycopy(stopwords, 0, tmp, 0, wordcount);
>   +          stopwords = tmp;
>   +        }
>   +        stopwords[wordcount - 1] = word;
>   +      }
>   +      result = makeWordTable(stopwords, wordcount);
>   +    }
>   +// On error, use an empty table
>   +    catch (IOException e) {
>   +      result = new Hashtable();
>   +    }
>   +    return result;
>   +  }
>
>   -    /**
>   -     * Builds the wordlist table.
>   -     *
>   -     * @param words   Word that where read
>   -     * @param length  Amount of words that where read into  
> <tt>words</tt>
>   -     */
>   -    private static Hashtable makeWordTable( String[] words, int  
> length ) {
>   -	Hashtable table = new Hashtable( length );
>   -	for ( int i = 0; i < length; i++ ) {
>   -	    table.put( words[i], words[i] );
>   -	}
>   -	return table;
>   +  /**
>   +   * Builds the wordlist table.
>   +   *
>   +   * @param words   Word that where read
>   +   * @param length  Amount of words that where read into  
> <tt>words</tt>
>   +   */
>   +  private static Hashtable makeWordTable(String[] words, int  
> length) {
>   +    Hashtable table = new Hashtable(length);
>   +    for (int i = 0; i < length; i++) {
>   +      table.put(words[i], words[i]);
>        }
>   +    return table;
>   +  }
>    }
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message