lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ehatc...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl DutchAnalyzer.java DutchStemFilter.java DutchStemmer.java
Date Fri, 12 Mar 2004 15:52:59 GMT
ehatcher    2004/03/12 07:52:59

  Modified:    contributions/analyzers/src/java/org/apache/lucene/analysis/br
                        BrazilianAnalyzer.java BrazilianStemFilter.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/cjk
                        CJKAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/cz
                        CzechAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/fr
                        FrenchAnalyzer.java FrenchStemFilter.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/nl
                        DutchAnalyzer.java DutchStemFilter.java
                        DutchStemmer.java
  Log:
  clean-up based on core changes to StopFilter
  
  Revision  Changes    Path
  1.5       +3 -2      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
  
  Index: BrazilianAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- BrazilianAnalyzer.java	11 Mar 2004 03:05:36 -0000	1.4
  +++ BrazilianAnalyzer.java	12 Mar 2004 15:52:58 -0000	1.5
  @@ -65,6 +65,7 @@
   import java.io.Reader;
   import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Set;
   
   /**
    * Analyzer for brazilian language. Supports an external list of stopwords (words that
  @@ -103,11 +104,11 @@
   	/**
   	 * Contains the stopwords used with the StopFilter.
   	 */
  -	private HashSet stoptable = new HashSet();
  +	private Set stoptable = new HashSet();
   	/**
   	 * Contains words that should be indexed but not stemmed.
   	 */
  -	private HashSet excltable = new HashSet();
  +	private Set excltable = new HashSet();
   
   	/**
   	 * Builds an analyzer.
  
  
  
  1.6       +52 -53    jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
  
  Index: BrazilianStemFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- BrazilianStemFilter.java	11 Mar 2004 03:05:36 -0000	1.5
  +++ BrazilianStemFilter.java	12 Mar 2004 15:52:58 -0000	1.6
  @@ -57,72 +57,71 @@
   import org.apache.lucene.analysis.Token;
   import org.apache.lucene.analysis.TokenFilter;
   import org.apache.lucene.analysis.TokenStream;
  +
   import java.io.IOException;
  -import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Hashtable;
  +import java.util.Set;
   
   /**
    * Based on (copied) the GermanStemFilter
    *
  - *
  - * @author    João Kramer
  - *
  - *
  - * A filter that stemms german words. It supports a table of words that should
  - * not be stemmed at all.
  - *
  - * @author    Gerhard Schwarz
  + * @author João Kramer
  + *         <p/>
  + *         <p/>
  + *         A filter that stemms german words. It supports a table of words that should
  + *         not be stemmed at all.
  + * @author Gerhard Schwarz
    */
   public final class BrazilianStemFilter extends TokenFilter {
   
  -	/**
  -	 * The actual token in the input stream.
  -	 */
  -	private Token token = null;
  -	private BrazilianStemmer stemmer = null;
  -	private HashSet exclusions = null;
  +  /**
  +   * The actual token in the input stream.
  +   */
  +  private Token token = null;
  +  private BrazilianStemmer stemmer = null;
  +  private Set exclusions = null;
   
  -	public BrazilianStemFilter( TokenStream in ) {
  +  public BrazilianStemFilter(TokenStream in) {
       super(in);
  -		stemmer = new BrazilianStemmer();
  -	}
  +    stemmer = new BrazilianStemmer();
  +  }
   
  -	/**
  -	 * Builds a BrazilianStemFilter that uses an exclusiontable.
  -   * 
  +  /**
  +   * Builds a BrazilianStemFilter that uses an exclusiontable.
  +   *
      * @deprecated
  -	 */
  -	public BrazilianStemFilter( TokenStream in, Hashtable exclusiontable ) {
  -		this( in );
  -		this.exclusions = new HashSet(exclusiontable.keySet());
  -	}
  -
  -	public BrazilianStemFilter( TokenStream in, HashSet exclusiontable ) {
  -		this( in );
  -		this.exclusions = exclusiontable;
  -	}
  -
  -	/**
  -	 * @return  Returns the next token in the stream, or null at EOS.
  -	 */
  -	public final Token next()
  -		throws IOException {
  -		if ( ( token = input.next() ) == null ) {
  -			return null;
  -		}
  -		// Check the exclusiontable.
  -		else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
  -			return token;
  -		}
  -		else {
  -			String s = stemmer.stem( token.termText() );
  -			// If not stemmed, dont waste the time creating a new token.
  -			if ( (s != null) && !s.equals( token.termText() ) ) {
  -				return new Token( s, 0, s.length(), token.type() );
  -			}
  -			return token;
  -		}
  -	}
  +   */
  +  public BrazilianStemFilter(TokenStream in, Hashtable exclusiontable) {
  +    this(in);
  +    this.exclusions = new HashSet(exclusiontable.keySet());
  +  }
  +
  +  public BrazilianStemFilter(TokenStream in, Set exclusiontable) {
  +    this(in);
  +    this.exclusions = exclusiontable;
  +  }
  +
  +  /**
  +   * @return Returns the next token in the stream, or null at EOS.
  +   */
  +  public final Token next()
  +      throws IOException {
  +    if ((token = input.next()) == null) {
  +      return null;
  +    }
  +    // Check the exclusiontable.
  +    else if (exclusions != null && exclusions.contains(token.termText())) {
  +      return token;
  +    } else {
  +      String s = stemmer.stem(token.termText());
  +      // If not stemmed, dont waste the time creating a new token.
  +      if ((s != null) && !s.equals(token.termText())) {
  +        return new Token(s, 0, s.length(), token.type());
  +      }
  +      return token;
  +    }
  +  }
   }
   
   
  
  
  
  1.4       +55 -56    jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
  
  Index: CJKAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- CJKAnalyzer.java	11 Mar 2004 03:05:36 -0000	1.3
  +++ CJKAnalyzer.java	12 Mar 2004 15:52:58 -0000	1.4
  @@ -61,9 +61,7 @@
   import org.apache.lucene.analysis.TokenStream;
   
   import java.io.Reader;
  -
  -import java.util.Hashtable;
  -import java.util.HashSet;
  +import java.util.Set;
   
   
   /**
  @@ -72,57 +70,58 @@
    * @author Che, Dong
    */
   public class CJKAnalyzer extends Analyzer {
  -    //~ Static fields/initializers ---------------------------------------------
  +  //~ Static fields/initializers ---------------------------------------------
   
  -    /**
  -     * An array containing some common English words that are not usually
  -     * useful for searching. and some double-byte interpunctions.....
  -     */
  -    private static String[] stopWords = {
  -                                            "a", "and", "are", "as", "at", "be",
  -                                            "but", "by", "for", "if", "in",
  -                                            "into", "is", "it", "no", "not",
  -                                            "of", "on", "or", "s", "such", "t",
  -                                            "that", "the", "their", "then",
  -                                            "there", "these", "they", "this",
  -                                            "to", "was", "will", "with", "",
  -                                            "www"
  -                                        };
  -
  -    //~ Instance fields --------------------------------------------------------
  -
  -    /** stop word list */
  -    private HashSet stopTable;
  -
  -    //~ Constructors -----------------------------------------------------------
  -
  -    /**
  -     * Builds an analyzer which removes words in STOP_WORDS.
  -     */
  -    public CJKAnalyzer() {
  -        stopTable = StopFilter.makeStopSet(stopWords);
  -    }
  -
  -    /**
  -     * Builds an analyzer which removes words in the provided array.
  -     *
  -     * @param stopWords stop word array
  -     */
  -    public CJKAnalyzer(String[] stopWords) {
  -        stopTable = StopFilter.makeStopSet(stopWords);
  -    }
  -
  -    //~ Methods ----------------------------------------------------------------
  -
  -    /**
  -     * get token stream from input
  -     *
  -     * @param fieldName lucene field name
  -     * @param reader input reader
  -     *
  -     * @return TokenStream
  -     */
  -    public final TokenStream tokenStream(String fieldName, Reader reader) {
  -        return new StopFilter(new CJKTokenizer(reader), stopTable);
  -    }
  +  /**
  +   * An array containing some common English words that are not usually
  +   * useful for searching. and some double-byte interpunctions.....
  +   */
  +  private static String[] stopWords = {
  +    "a", "and", "are", "as", "at", "be",
  +    "but", "by", "for", "if", "in",
  +    "into", "is", "it", "no", "not",
  +    "of", "on", "or", "s", "such", "t",
  +    "that", "the", "their", "then",
  +    "there", "these", "they", "this",
  +    "to", "was", "will", "with", "",
  +    "www"
  +  };
  +
  +  //~ Instance fields --------------------------------------------------------
  +
  +  /**
  +   * stop word list
  +   */
  +  private Set stopTable;
  +
  +  //~ Constructors -----------------------------------------------------------
  +
  +  /**
  +   * Builds an analyzer which removes words in STOP_WORDS.
  +   */
  +  public CJKAnalyzer() {
  +    stopTable = StopFilter.makeStopSet(stopWords);
  +  }
  +
  +  /**
  +   * Builds an analyzer which removes words in the provided array.
  +   *
  +   * @param stopWords stop word array
  +   */
  +  public CJKAnalyzer(String[] stopWords) {
  +    stopTable = StopFilter.makeStopSet(stopWords);
  +  }
  +
  +  //~ Methods ----------------------------------------------------------------
  +
  +  /**
  +   * get token stream from input
  +   *
  +   * @param fieldName lucene field name
  +   * @param reader    input reader
  +   * @return TokenStream
  +   */
  +  public final TokenStream tokenStream(String fieldName, Reader reader) {
  +    return new StopFilter(new CJKTokenizer(reader), stopTable);
  +  }
   }
  
  
  
  1.4       +2 -1      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
  
  Index: CzechAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- CzechAnalyzer.java	11 Mar 2004 03:05:36 -0000	1.3
  +++ CzechAnalyzer.java	12 Mar 2004 15:52:58 -0000	1.4
  @@ -65,6 +65,7 @@
   import java.io.*;
   import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Set;
   
   /**
    * Analyzer for Czech language. Supports an external list of stopwords (words that
  @@ -103,7 +104,7 @@
   	/**
   	 * Contains the stopwords used with the StopFilter.
   	 */
  -	private HashSet stoptable;
  +	private Set stoptable;
   
   	/**
   	 * Builds an analyzer.
  
  
  
  1.5       +110 -107  jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
  
  Index: FrenchAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- FrenchAnalyzer.java	11 Mar 2004 03:05:36 -0000	1.4
  +++ FrenchAnalyzer.java	12 Mar 2004 15:52:58 -0000	1.5
  @@ -58,14 +58,15 @@
   import org.apache.lucene.analysis.LowerCaseFilter;
   import org.apache.lucene.analysis.StopFilter;
   import org.apache.lucene.analysis.TokenStream;
  +import org.apache.lucene.analysis.de.WordlistLoader;
   import org.apache.lucene.analysis.standard.StandardFilter;
   import org.apache.lucene.analysis.standard.StandardTokenizer;
  +
   import java.io.File;
   import java.io.Reader;
  -import java.util.Hashtable;
   import java.util.HashSet;
  -
  -import org.apache.lucene.analysis.de.WordlistLoader;
  +import java.util.Hashtable;
  +import java.util.Set;
   
   /**
    * Analyzer for french language. Supports an external list of stopwords (words that
  @@ -74,115 +75,117 @@
    * A default set of stopwords is used unless an other list is specified, the
    * exclusionlist is empty by default.
    *
  - * @author    Patrick Talbot (based on Gerhard Schwarz work for German)
  - * @version   $Id$
  + * @author Patrick Talbot (based on Gerhard Schwarz work for German)
  + * @version $Id$
    */
   public final class FrenchAnalyzer extends Analyzer {
   
  -	/**
  -	 * Extended list of typical french stopwords.
  -	 */
  -	private String[] FRENCH_STOP_WORDS = {
  -		"a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
  -		"autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
  -		"c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
  -		"certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
  -		"combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
  -		"dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
  -		"desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
  -		"diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
  -		"en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
  -		"hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
  -		"le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui", "là",
  -		"ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi",
  -		"moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
  -		"nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
  -		"partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
  -		"proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
  -		"qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
  -		"seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
  -		"son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
  -		"tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers",
  -		"voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès",
  -		"été", "être", "ô"
  -	};
  -
  -	/**
  -	 * Contains the stopwords used with the StopFilter.
  -	 */
  -	private HashSet stoptable = new HashSet();
  -	/**
  -	 * Contains words that should be indexed but not stemmed.
  -	 */
  -	private HashSet excltable = new HashSet();
  -
  -	/**
  -	 * Builds an analyzer.
  -	 */
  -	public FrenchAnalyzer() {
  -		stoptable = StopFilter.makeStopSet( FRENCH_STOP_WORDS );
  -	}
  -
  -	/**
  -	 * Builds an analyzer with the given stop words.
  -	 */
  -	public FrenchAnalyzer( String[] stopwords ) {
  -		stoptable = StopFilter.makeStopSet( stopwords );
  -	}
  +  /**
  +   * Extended list of typical french stopwords.
  +   */
  +  private String[] FRENCH_STOP_WORDS = {
  +    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
  +    "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
  +    "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
  +    "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
  +    "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
  +    "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
  +    "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
  +    "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
  +    "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
  +    "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
  +    "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui",
"là",
  +    "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens",
"moi",
  +    "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
  +    "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
  +    "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
  +    "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
  +    "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
  +    "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
  +    "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
  +    "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va",
"vers",
  +    "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça",
"ès",
  +    "été", "être", "ô"
  +  };
  +
  +  /**
  +   * Contains the stopwords used with the StopFilter.
  +   */
  +  private Set stoptable = new HashSet();
  +  /**
  +   * Contains words that should be indexed but not stemmed.
  +   */
  +  private Set excltable = new HashSet();
  +
  +  /**
  +   * Builds an analyzer.
  +   */
  +  public FrenchAnalyzer() {
  +    stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
  +  }
  +
  +  /**
  +   * Builds an analyzer with the given stop words.
  +   */
  +  public FrenchAnalyzer(String[] stopwords) {
  +    stoptable = StopFilter.makeStopSet(stopwords);
  +  }
   
  -	/**
  -	 * Builds an analyzer with the given stop words.
  +  /**
  +   * Builds an analyzer with the given stop words.
      *
      * @deprecated
  -	 */
  -	public FrenchAnalyzer( Hashtable stopwords ) {
  -		stoptable = new HashSet(stopwords.keySet());
  -	}
  -
  -	/**
  -	 * Builds an analyzer with the given stop words.
  -	 */
  -	public FrenchAnalyzer( File stopwords ) {
  -		stoptable = new HashSet(WordlistLoader.getWordtable( stopwords ).keySet());
  -	}
  -
  -	/**
  -	 * Builds an exclusionlist from an array of Strings.
  -	 */
  -	public void setStemExclusionTable( String[] exclusionlist ) {
  -		excltable = StopFilter.makeStopSet( exclusionlist );
  -	}
  -	/**
  -	 * Builds an exclusionlist from a Hashtable.
  -	 */
  -	public void setStemExclusionTable( Hashtable exclusionlist ) {
  -		excltable = new HashSet(exclusionlist.keySet());
  -	}
  -	/**
  -	 * Builds an exclusionlist from the words contained in the given file.
  -	 */
  -	public void setStemExclusionTable( File exclusionlist ) {
  -		excltable = new HashSet(WordlistLoader.getWordtable( exclusionlist ).keySet());
  -	}
  -
  -	/**
  -	 * Creates a TokenStream which tokenizes all the text in the provided Reader.
  -	 *
  -	 * @return  A TokenStream build from a StandardTokenizer filtered with
  -	 * 			StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
  -	 */
  -	public final TokenStream tokenStream( String fieldName, Reader reader ) {
  -		
  -		if (fieldName==null) throw new IllegalArgumentException("fieldName must not be null");
  -		if (reader==null) throw new IllegalArgumentException("readermust not be null");
  -				
  -		TokenStream result = new StandardTokenizer( reader );
  -		result = new StandardFilter( result );
  -		result = new StopFilter( result, stoptable );
  -		result = new FrenchStemFilter( result, excltable );
  -		// Convert to lowercase after stemming!
  -		result = new LowerCaseFilter( result );
  -		return result;
  -	}
  +   */
  +  public FrenchAnalyzer(Hashtable stopwords) {
  +    stoptable = new HashSet(stopwords.keySet());
  +  }
  +
  +  /**
  +   * Builds an analyzer with the given stop words.
  +   */
  +  public FrenchAnalyzer(File stopwords) {
  +    stoptable = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());
  +  }
  +
  +  /**
  +   * Builds an exclusionlist from an array of Strings.
  +   */
  +  public void setStemExclusionTable(String[] exclusionlist) {
  +    excltable = StopFilter.makeStopSet(exclusionlist);
  +  }
  +
  +  /**
  +   * Builds an exclusionlist from a Hashtable.
  +   */
  +  public void setStemExclusionTable(Hashtable exclusionlist) {
  +    excltable = new HashSet(exclusionlist.keySet());
  +  }
  +
  +  /**
  +   * Builds an exclusionlist from the words contained in the given file.
  +   */
  +  public void setStemExclusionTable(File exclusionlist) {
  +    excltable = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());
  +  }
  +
  +  /**
  +   * Creates a TokenStream which tokenizes all the text in the provided Reader.
  +   *
  +   * @return A TokenStream build from a StandardTokenizer filtered with
  +   *         StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
  +   */
  +  public final TokenStream tokenStream(String fieldName, Reader reader) {
  +
  +    if (fieldName == null) throw new IllegalArgumentException("fieldName must not be null");
  +    if (reader == null) throw new IllegalArgumentException("readermust not be null");
  +
  +    TokenStream result = new StandardTokenizer(reader);
  +    result = new StandardFilter(result);
  +    result = new StopFilter(result, stoptable);
  +    result = new FrenchStemFilter(result, excltable);
  +    // Convert to lowercase after stemming!
  +    result = new LowerCaseFilter(result);
  +    return result;
  +  }
   }
   
  
  
  
  1.4       +3 -2      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
  
  Index: FrenchStemFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- FrenchStemFilter.java	11 Mar 2004 03:05:36 -0000	1.3
  +++ FrenchStemFilter.java	12 Mar 2004 15:52:59 -0000	1.4
  @@ -60,6 +60,7 @@
   import java.io.IOException;
   import java.util.Hashtable;
   import java.util.HashSet;
  +import java.util.Set;
   
   /**
    * A filter that stemms french words. It supports a table of words that should
  @@ -75,7 +76,7 @@
   	 */
   	private Token token = null;
   	private FrenchStemmer stemmer = null;
  -	private HashSet exclusions = null;
  +	private Set exclusions = null;
   
   	public FrenchStemFilter( TokenStream in ) {
       super(in);
  @@ -92,7 +93,7 @@
   		exclusions = new HashSet(exclusiontable.keySet());
   	}
   
  -	public FrenchStemFilter( TokenStream in, HashSet exclusiontable ) {
  +	public FrenchStemFilter( TokenStream in, Set exclusiontable ) {
   		this( in );
   		exclusions = exclusiontable;
   	}
  
  
  
  1.3       +5 -3      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
  
  Index: DutchAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DutchAnalyzer.java	11 Mar 2004 03:05:36 -0000	1.2
  +++ DutchAnalyzer.java	12 Mar 2004 15:52:59 -0000	1.3
  @@ -26,6 +26,8 @@
   import java.io.Reader;
   import java.util.HashMap;
   import java.util.HashSet;
  +import java.util.Set;
  +import java.util.Map;
   
   /**
    * @author Edwin de Jonge
  @@ -61,14 +63,14 @@
     /**
      * Contains the stopwords used with the StopFilter.
      */
  -  private HashSet stoptable = new HashSet();
  +  private Set stoptable = new HashSet();
   
     /**
      * Contains words that should be indexed but not stemmed.
      */
  -  private HashSet excltable = new HashSet();
  +  private Set excltable = new HashSet();
   
  -  private HashMap _stemdict = new HashMap();
  +  private Map _stemdict = new HashMap();
   
   
     /**
  
  
  
  1.3       +5 -3      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
  
  Index: DutchStemFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DutchStemFilter.java	11 Mar 2004 03:05:36 -0000	1.2
  +++ DutchStemFilter.java	12 Mar 2004 15:52:59 -0000	1.3
  @@ -23,6 +23,8 @@
   import java.io.IOException;
   import java.util.HashMap;
   import java.util.HashSet;
  +import java.util.Set;
  +import java.util.Map;
   
   /**
    * @author Edwin de Jonge
  @@ -37,7 +39,7 @@
      */
     private Token token = null;
     private DutchStemmer stemmer = null;
  -  private HashSet exclusions = null;
  +  private Set exclusions = null;
   
     public DutchStemFilter(TokenStream _in) {
       super(_in);
  @@ -47,7 +49,7 @@
     /**
      * Builds a DutchStemFilter that uses an exclusiontable.
      */
  -  public DutchStemFilter(TokenStream _in, HashSet exclusiontable) {
  +  public DutchStemFilter(TokenStream _in, Set exclusiontable) {
       this(_in);
       exclusions = exclusiontable;
     }
  @@ -55,7 +57,7 @@
     /**
      * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
      */
  -  public DutchStemFilter(TokenStream _in, HashSet exclusiontable, HashMap stemdictionary)
{
  +  public DutchStemFilter(TokenStream _in, Set exclusiontable, Map stemdictionary) {
       this(_in, exclusiontable);
       stemmer.setStemDictionary(stemdictionary);
     }
  
  
  
  1.3       +3 -3      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
  
  Index: DutchStemmer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DutchStemmer.java	11 Mar 2004 03:05:36 -0000	1.2
  +++ DutchStemmer.java	12 Mar 2004 15:52:59 -0000	1.3
  @@ -16,7 +16,7 @@
    * limitations under the License.
    */
   
  -import java.util.HashMap;
  +import java.util.Map;
   
   /*
    * @author Edwin de Jonge (ejne@cbs.nl)
  @@ -32,7 +32,7 @@
      */
     private StringBuffer sb = new StringBuffer();
     private boolean _removedE;
  -  private HashMap _stemDict;
  +  private Map _stemDict;
   
     private int _R1;
     private int _R2;
  @@ -399,7 +399,7 @@
       return false;
     }
   
  -  void setStemDictionary(HashMap dict) {
  +  void setStemDictionary(Map dict) {
       _stemDict = dict;
     }
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message