lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ehatc...@apache.org
Subject cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java
Date Fri, 12 Mar 2004 09:45:17 GMT
ehatcher    2004/03/12 01:45:17

  Modified:    src/java/org/apache/lucene/analysis/de GermanAnalyzer.java
  Log:
  format clean-up
  
  Revision  Changes    Path
  1.9       +96 -104   jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
  
  Index: GermanAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- GermanAnalyzer.java	12 Mar 2004 09:43:48 -0000	1.8
  +++ GermanAnalyzer.java	12 Mar 2004 09:45:17 -0000	1.9
  @@ -59,11 +59,12 @@
   import org.apache.lucene.analysis.TokenStream;
   import org.apache.lucene.analysis.standard.StandardFilter;
   import org.apache.lucene.analysis.standard.StandardTokenizer;
  +
   import java.io.File;
   import java.io.Reader;
  +import java.util.HashSet;
   import java.util.Hashtable;
   import java.util.Set;
  -import java.util.HashSet;
   
   /**
    * Analyzer for German language. Supports an external list of stopwords (words that
  @@ -72,108 +73,99 @@
    * A default set of stopwords is used unless an alternative list is specified, the
    * exclusion list is empty by default.
    *
  - * @author    Gerhard Schwarz
  - * @version   $Id$
  + * @author Gerhard Schwarz
  + * @version $Id$
    */
  -public class GermanAnalyzer extends Analyzer
  -{
  -    /**
  -     * List of typical german stopwords.
  -     */
  -    private String[] GERMAN_STOP_WORDS = {
  -	"einer", "eine", "eines", "einem", "einen",
  -	"der", "die", "das", "dass", "daß",
  -	"du", "er", "sie", "es",
  -	"was", "wer", "wie", "wir",
  -	"und", "oder", "ohne", "mit",
  -	"am", "im", "in", "aus", "auf",
  -	"ist", "sein", "war", "wird",
  -	"ihr", "ihre", "ihres",
  -	"als", "für", "von", "mit",
  -	"dich", "dir", "mich", "mir",
  -	"mein", "sein", "kein",
  -	"durch", "wegen", "wird"
  -    };
  -
  -    /**
  -     * Contains the stopwords used with the StopFilter.
  -     */
  -    private Set stopSet = new HashSet();
  -
  -    /**
  -     * Contains words that should be indexed but not stemmed.
  -     */
  -    private Set exclusionSet = new HashSet();
  -
  -    /**
  -     * Builds an analyzer.
  -     */
  -    public GermanAnalyzer()
  -    {
  -	stopSet = StopFilter.makeStopSet( GERMAN_STOP_WORDS );
  -    }
  -
  -    /**
  -     * Builds an analyzer with the given stop words.
  -     */
  -    public GermanAnalyzer( String[] stopwords )
  -    {
  -	stopSet = StopFilter.makeStopSet( stopwords );
  -    }
  -
  -    /**
  -     * Builds an analyzer with the given stop words.
  -     */
  -    public GermanAnalyzer( Hashtable stopwords )
  -    {
  -	stopSet = new HashSet(stopwords.keySet());
  -    }
  -
  -    /**
  -     * Builds an analyzer with the given stop words.
  -     */
  -    public GermanAnalyzer( File stopwords )
  -    {
  -	stopSet = new HashSet(WordlistLoader.getWordtable( stopwords ).keySet());
  -    }
  -
  -    /**
  -     * Builds an exclusionlist from an array of Strings.
  -     */
  -    public void setStemExclusionTable( String[] exclusionlist )
  -    {
  -	exclusionSet = StopFilter.makeStopSet( exclusionlist );
  -    }
  -
  -    /**
  -     * Builds an exclusionlist from a Hashtable.
  -     */
  -    public void setStemExclusionTable( Hashtable exclusionlist )
  -    {
  -	exclusionSet = new HashSet(exclusionlist.keySet());
  -    }
  -
  -    /**
  -     * Builds an exclusionlist from the words contained in the given file.
  -     */
  -    public void setStemExclusionTable( File exclusionlist )
  -    {
  -	exclusionSet = new HashSet(WordlistLoader.getWordtable( exclusionlist ).keySet());
  -    }
  -
  -    /**
  -     * Creates a TokenStream which tokenizes all the text in the provided Reader.
  -     *
  -     * @return  A TokenStream build from a StandardTokenizer filtered with
  -     *		StandardFilter, StopFilter, GermanStemFilter
  -     */
  -    public TokenStream tokenStream( String fieldName, Reader reader )
  -    {
  -	TokenStream result = new StandardTokenizer( reader );
  -	result = new StandardFilter( result );
  -  // shouldn't there be a lowercaser before stop word filtering?
  -  result = new StopFilter( result, stopSet );
  -	result = new GermanStemFilter( result, exclusionSet );
  -	return result;
  -    }
  +public class GermanAnalyzer extends Analyzer {
  +  /**
  +   * List of typical german stopwords.
  +   */
  +  private String[] GERMAN_STOP_WORDS = {
  +    "einer", "eine", "eines", "einem", "einen",
  +    "der", "die", "das", "dass", "daß",
  +    "du", "er", "sie", "es",
  +    "was", "wer", "wie", "wir",
  +    "und", "oder", "ohne", "mit",
  +    "am", "im", "in", "aus", "auf",
  +    "ist", "sein", "war", "wird",
  +    "ihr", "ihre", "ihres",
  +    "als", "für", "von", "mit",
  +    "dich", "dir", "mich", "mir",
  +    "mein", "sein", "kein",
  +    "durch", "wegen", "wird"
  +  };
  +
  +  /**
  +   * Contains the stopwords used with the StopFilter.
  +   */
  +  private Set stopSet = new HashSet();
  +
  +  /**
  +   * Contains words that should be indexed but not stemmed.
  +   */
  +  private Set exclusionSet = new HashSet();
  +
  +  /**
  +   * Builds an analyzer.
  +   */
  +  public GermanAnalyzer() {
  +    stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
  +  }
  +
  +  /**
  +   * Builds an analyzer with the given stop words.
  +   */
  +  public GermanAnalyzer(String[] stopwords) {
  +    stopSet = StopFilter.makeStopSet(stopwords);
  +  }
  +
  +  /**
  +   * Builds an analyzer with the given stop words.
  +   */
  +  public GermanAnalyzer(Hashtable stopwords) {
  +    stopSet = new HashSet(stopwords.keySet());
  +  }
  +
  +  /**
  +   * Builds an analyzer with the given stop words.
  +   */
  +  public GermanAnalyzer(File stopwords) {
  +    stopSet = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());
  +  }
  +
  +  /**
  +   * Builds an exclusionlist from an array of Strings.
  +   */
  +  public void setStemExclusionTable(String[] exclusionlist) {
  +    exclusionSet = StopFilter.makeStopSet(exclusionlist);
  +  }
  +
  +  /**
  +   * Builds an exclusionlist from a Hashtable.
  +   */
  +  public void setStemExclusionTable(Hashtable exclusionlist) {
  +    exclusionSet = new HashSet(exclusionlist.keySet());
  +  }
  +
  +  /**
  +   * Builds an exclusionlist from the words contained in the given file.
  +   */
  +  public void setStemExclusionTable(File exclusionlist) {
  +    exclusionSet = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());
  +  }
  +
  +  /**
  +   * Creates a TokenStream which tokenizes all the text in the provided Reader.
  +   *
  +   * @return A TokenStream build from a StandardTokenizer filtered with
  +   *         StandardFilter, StopFilter, GermanStemFilter
  +   */
  +  public TokenStream tokenStream(String fieldName, Reader reader) {
  +    TokenStream result = new StandardTokenizer(reader);
  +    result = new StandardFilter(result);
  +// shouldn't there be a lowercaser before stop word filtering?
  +    result = new StopFilter(result, stopSet);
  +    result = new GermanStemFilter(result, exclusionSet);
  +    return result;
  +  }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message