lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ccurr...@apache.org
Subject [Lucene.Net] svn commit: r1294875 [3/45] - in /incubator/lucene.net/trunk: ./ build/ build/vs2010/contrib/ build/vs2010/test/ doc/ src/ src/contrib/Analyzers/ src/contrib/Analyzers/AR/ src/contrib/Analyzers/BR/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/Cn/ s...
Date Tue, 28 Feb 2012 22:43:28 GMT
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Cz/CzechAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Cz/CzechAnalyzer.cs?rev=1294875&r1=1294874&r2=1294875&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Cz/CzechAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Cz/CzechAnalyzer.cs Tue Feb 28 22:43:08 2012
@@ -1,25 +1,22 @@
-/*
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Text;
 using System.Collections;
@@ -27,186 +24,199 @@ using System.Collections;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.De;
 using Lucene.Net.Analysis.Standard;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net.Analysis.Cz
 {
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
+/**
+ * {@link Analyzer} for Czech language. 
+ * <p>
+ * Supports an external list of stopwords (words that
+ * will not be indexed at all). 
+ * A default set of stopwords is used unless an alternative list is specified.
+ * </p>
+ *
+ * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}.</p>
+ */
+public sealed class CzechAnalyzer : Analyzer {
+
+	/**
+	 * List of typical stopwords.
+	 * @deprecated use {@link #getDefaultStopSet()} instead
+	 */
+  // TODO make this private in 3.1
+	public static readonly String[] CZECH_STOP_WORDS = {
+        "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
+        "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
+        "jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
+        "mi","n\u00e1m","tom","tomuto","m\u00edt","nic","proto","kterou","byla",
+        "toho","proto\u017ee","asi","ho","na\u0161i","napi\u0161te","re","co\u017e","t\u00edm",
+        "tak\u017ee","sv\u00fdch","jej\u00ed","sv\u00fdmi","jste","aj","tu","tedy","teto",
+        "bylo","kde","ke","prav\u00e9","ji","nad","nejsou","\u010di","pod","t\u00e9ma",
+        "mezi","p\u0159es","ty","pak","v\u00e1m","ani","kdy\u017e","v\u0161ak","neg","jsem",
+        "tento","\u010dl\u00e1nku","\u010dl\u00e1nky","aby","jsme","p\u0159ed","pta","jejich",
+        "byl","je\u0161t\u011b","a\u017e","bez","tak\u00e9","pouze","prvn\u00ed","va\u0161e","kter\u00e1",
+        "n\u00e1s","nov\u00fd","tipy","pokud","m\u016f\u017ee","strana","jeho","sv\u00e9","jin\u00e9",
+        "zpr\u00e1vy","nov\u00e9","nen\u00ed","v\u00e1s","jen","podle","zde","u\u017e","b\u00fdt","v\u00edce",
+        "bude","ji\u017e","ne\u017e","kter\u00fd","by","kter\u00e9","co","nebo","ten","tak",
+        "m\u00e1","p\u0159i","od","po","jsou","jak","dal\u0161\u00ed","ale","si","se","ve",
+        "to","jako","za","zp\u011bt","ze","do","pro","je","na","atd","atp",
+        "jakmile","p\u0159i\u010dem\u017e","j\u00e1","on","ona","ono","oni","ony","my","vy",
+        "j\u00ed","ji","m\u011b","mne","jemu","tomu","t\u011bm","t\u011bmu","n\u011bmu","n\u011bmu\u017e",
+        "jeho\u017e","j\u00ed\u017e","jeliko\u017e","je\u017e","jako\u017e","na\u010de\u017e",
+    };
+	
+	/**
+	 * Returns a set of default Czech-stopwords 
+	 * @return a set of default Czech-stopwords 
+	 */
+	public static ISet<string> getDefaultStopSet(){
+	  return DefaultSetHolder.DEFAULT_SET;
+	}
+	
+	private static class DefaultSetHolder {
+	  internal static ISet<string> DEFAULT_SET = CharArraySet.UnmodifiableSet(new CharArraySet(
+	      CZECH_STOP_WORDS, false));
+	}
+
+	/**
+	 * Contains the stopwords used with the {@link StopFilter}.
+	 */
+	// TODO make this final in 3.1
+	private ISet<string> stoptable;
+  private readonly Version matchVersion;
+
+	/**
+	 * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
+	 */
+	public CzechAnalyzer(Version matchVersion) 
+    : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
+    {
+    
+	}
+	
+	/**
+   * Builds an analyzer with the given stop words and stemming exclusion words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public CzechAnalyzer(Version matchVersion, ISet<string> stopwords) {
+    this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
+  }
+
+
+	/**
+	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
+	 */
+  public CzechAnalyzer(Version matchVersion, params string[] stopwords) 
+  : this(matchVersion, StopFilter.MakeStopSet( stopwords ))
+  {
+    
+	}
+
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
+   */
+  public CzechAnalyzer(Version matchVersion, HashSet<string> stopwords) 
+  : this(matchVersion, (ISet<string>)stopwords)
+  {
+    
+	}
+
+	/**
+	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
+	 */
+  public CzechAnalyzer(Version matchVersion, FileInfo stopwords ) 
+      : this(matchVersion, WordlistLoader.GetWordSet( stopwords ))
+  {
+    
+	}
+
+    /**
+     * Loads stopwords hash from resource stream (file, database...).
+     * @param   wordfile    File containing the wordlist
+     * @param   encoding    Encoding used (win-1250, iso-8859-2, ...), null for default system encoding
+     * @deprecated use {@link WordlistLoader#getWordSet(Reader, String) }
+     *             and {@link #CzechAnalyzer(Version, Set)} instead
+     */
+    public void LoadStopWords( Stream wordfile, System.Text.Encoding encoding ) {
+        SetPreviousTokenStream(null); // force a new stopfilter to be created
+        if ( wordfile == null )
+        {
+            stoptable = new HashSet<string>();
+            return;
+        }
+        try {
+            // clear any previous table (if present)
+            stoptable = new HashSet<string>();
+
+            StreamReader isr;
+            if (encoding == null)
+                isr = new StreamReader(wordfile);
+            else
+                isr = new StreamReader(wordfile, encoding);
+
+            stoptable = WordlistLoader.GetWordSet(isr);
+        } catch ( IOException e ) {
+          // clear any previous table (if present)
+          // TODO: throw IOException
+          stoptable = new HashSet<string>();
+        }
+    }
+
+	/**
+	 * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
 	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// Analyzer for Czech language. Supports an external list of stopwords (words that
-	/// will not be indexed at all).
-	/// A default set of stopwords is used unless an alternative list is specified, the
-	/// exclusion list is empty by default.
-	/// 
-	/// <author>Lukas Zapletal [lzap@root.cz]</author>
-	/// <version>$Id: CzechAnalyzer.java,v 1.2 2003/01/22 20:54:47 ehatcher Exp $</version>
-	/// </summary>
-	public sealed class CzechAnalyzer : Analyzer 
-	{
-		/// <summary>
-		/// List of typical stopwords.
-		/// </summary>
-		public static String[] STOP_WORDS = 
-				 {
-					 "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
-					 "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
-					 "jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
-					 "mi","n\u00e1m","tom","tomuto","m\u00edt","nic","proto","kterou","byla",
-					 "toho","proto\u017ee","asi","ho","na\u0161i","napi\u0161te","re","co\u017e","t\u00edm",
-					 "tak\u017ee","sv\u00fdch","jej\u00ed","sv\u00fdmi","jste","aj","tu","tedy","teto",
-					 "bylo","kde","ke","prav\u00e9","ji","nad","nejsou","\u010di","pod","t\u00e9ma",
-					 "mezi","p\u0159es","ty","pak","v\u00e1m","ani","kdy\u017e","v\u0161ak","neg","jsem",
-					 "tento","\u010dl\u00e1nku","\u010dl\u00e1nky","aby","jsme","p\u0159ed","pta","jejich",
-					 "byl","je\u0161t\u011b","a\u017e","bez","tak\u00e9","pouze","prvn\u00ed","va\u0161e","kter\u00e1",
-					 "n\u00e1s","nov\u00fd","tipy","pokud","m\u016f\u017ee","strana","jeho","sv\u00e9","jin\u00e9",
-					 "zpr\u00e1vy","nov\u00e9","nen\u00ed","v\u00e1s","jen","podle","zde","u\u017e","b\u00fdt","v\u00edce",
-					 "bude","ji\u017e","ne\u017e","kter\u00fd","by","kter\u00e9","co","nebo","ten","tak",
-					 "m\u00e1","p\u0159i","od","po","jsou","jak","dal\u0161\u00ed","ale","si","se","ve",
-					 "to","jako","za","zp\u011bt","ze","do","pro","je","na","atd","atp",
-					 "jakmile","p\u0159i\u010dem\u017e","j\u00e1","on","ona","ono","oni","ony","my","vy",
-					 "j\u00ed","ji","m\u011b","mne","jemu","tomu","t\u011bm","t\u011bmu","n\u011bmu","n\u011bmu\u017e",
-					 "jeho\u017e","j\u00ed\u017e","jeliko\u017e","je\u017e","jako\u017e","na\u010de\u017e",
-		};
-
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter.
-		/// </summary>
-		private Hashtable stoptable = new Hashtable();
-
-		/// <summary>
-		/// Builds an analyzer.
-		/// </summary>
-		public CzechAnalyzer() 
-		{
-			stoptable = StopFilter.MakeStopSet( STOP_WORDS );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public CzechAnalyzer( String[] stopwords ) 
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public CzechAnalyzer( Hashtable stopwords ) 
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public CzechAnalyzer( FileInfo stopwords ) 
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Loads stopwords hash from resource stream (file, database...).
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist</param>
-		/// <param name="encoding">Encoding used (win-1250, iso-8859-2, ...}, null for default system encoding</param>
-		public void LoadStopWords( Stream wordfile, String encoding ) 
-		{
-			if ( wordfile == null ) 
-			{
-				stoptable = new Hashtable();
-				return;
-			}
-			try 
-			{
-				// clear any previous table (if present)
-				stoptable = new Hashtable();
-
-				StreamReader isr;
-				if (encoding == null)
-					isr = new StreamReader(wordfile);
-				else
-					isr = new StreamReader(wordfile, Encoding.GetEncoding(encoding));
-
-				String word;
-				while ( ( word = isr.ReadLine() ) != null ) 
-				{
-					stoptable[word] = word;
-				}
-
-			} 
-			catch ( IOException ) 
-			{
-				stoptable = null;
-			}
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided Reader.
-		/// </summary>
-		/// <returns>
-		/// A TokenStream build from a StandardTokenizer filtered with
-		/// StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter
-		/// </returns>
-		public override TokenStream TokenStream( String fieldName, TextReader reader ) 
-		{
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new LowerCaseFilter( result );
-			result = new StopFilter( result, stoptable );
-			return result;
-		}
+	 * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+	 * 			{@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
+	 */
+	public override sealed TokenStream TokenStream( String fieldName, TextReader reader ) {
+                TokenStream result = new StandardTokenizer( matchVersion, reader );
+		result = new StandardFilter( result );
+		result = new LowerCaseFilter( result );
+		result = new StopFilter( StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                         result, stoptable );
+		return result;
 	}
+	
+	private class SavedStreams {
+	    protected internal Tokenizer source;
+	    protected internal TokenStream result;
+	};
+	
+	/**
+     * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in 
+     * the provided {@link Reader}.
+     *
+     * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+     *          {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
+     */
+	public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+    {
+      SavedStreams streams = (SavedStreams) GetPreviousTokenStream();
+      if (streams == null) {
+        streams = new SavedStreams();
+        streams.source = new StandardTokenizer(matchVersion, reader);
+        streams.result = new StandardFilter(streams.source);
+        streams.result = new LowerCaseFilter(streams.result);
+        streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                        streams.result, stoptable);
+        SetPreviousTokenStream(streams);
+      } else {
+        streams.source.Reset(reader);
+      }
+      return streams.result;
+    }
+}
+
+
 }

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs?rev=1294875&r1=1294874&r2=1294875&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs Tue Feb 28 22:43:08 2012
@@ -20,26 +20,31 @@
 */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Collections;
+using System.Linq;
 using Lucene.Net.Analysis.Standard;
 using Lucene.Net.Analysis;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net.Analysis.De
 {
-	/// <summary>
-	/// Analyzer for German language. Supports an external list of stopwords (words that
-	/// will not be indexed at all) and an external list of exclusions (word that will
-	/// not be stemmed, but indexed).
-	/// A default set of stopwords is used unless an alternative list is specified, the
-	/// exclusion list is empty by default.
-	/// </summary>
-	public class GermanAnalyzer : Analyzer
-	{
-		/// <summary>
-		/// List of typical german stopwords.
-		/// </summary>
-		private String[] GERMAN_STOP_WORDS = 
+    /// <summary>
+    /// Analyzer for German language. Supports an external list of stopwords (words that
+    /// will not be indexed at all) and an external list of exclusions (word that will
+    /// not be stemmed, but indexed).
+    /// A default set of stopwords is used unless an alternative list is specified, the
+    /// exclusion list is empty by default.
+    /// </summary>
+    public class GermanAnalyzer : Analyzer
+    {
+        /// <summary>
+        /// List of typical german stopwords.
+        /// </summary>
+        [Obsolete("Use GetDefaultStopSet() instead")]
+        //TODO: make this private in 3.1
+        private static readonly String[] GERMAN_STOP_WORDS = 
 		{
 			"einer", "eine", "eines", "einem", "einen",
 			"der", "die", "das", "dass", "daß",
@@ -55,92 +60,150 @@ namespace Lucene.Net.Analysis.De
 			"durch", "wegen"
 		};
 
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter. 
-		/// </summary>
-		private Hashtable stoptable = new Hashtable();
-
-		/// <summary>
-		/// Contains words that should be indexed but not stemmed. 
-		/// </summary>
-		private Hashtable excltable = new Hashtable();
-
-		/// <summary>
-		/// Builds an analyzer. 
-		/// </summary>
-		public GermanAnalyzer()
-		{
-			stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public GermanAnalyzer( String[] stopwords )
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public GermanAnalyzer( Hashtable stopwords )
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public GermanAnalyzer( FileInfo stopwords )
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from an array of Strings. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( String[] exclusionlist )
-		{
-			excltable = StopFilter.MakeStopSet( exclusionlist );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from a Hashtable. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( Hashtable exclusionlist )
-		{
-			excltable = exclusionlist;
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from the words contained in the given file. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable(FileInfo exclusionlist)
-		{
-			excltable = WordlistLoader.GetWordtable(exclusionlist);
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
-		/// </summary>
-		/// <param name="fieldName"></param>
-		/// <param name="reader"></param>
-		/// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
-		public override TokenStream TokenStream(String fieldName, TextReader reader)
-		{
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new LowerCaseFilter(result);
-			result = new StopFilter( result, stoptable );
-			result = new GermanStemFilter( result, excltable );
-			return result;
-		}
-	}
+        /// <summary>
+        /// Returns a set of default German-stopwords 
+        /// </summary>
+        public static ISet<string> GetDefaultStopSet()
+        {
+            return DefaultSetHolder.DEFAULT_SET;
+        }
+
+        private static class DefaultSetHolder
+        {
+            internal static readonly ISet<string> DEFAULT_SET = CharArraySet.UnmodifiableSet(new CharArraySet(
+                                                                                                 GERMAN_STOP_WORDS,
+                                                                                                 false));
+        }
+
+        /// <summary>
+        /// Contains the stopwords used with the StopFilter. 
+        /// </summary>
+        //TODO: make this readonly in 3.1
+        private ISet<string> stopSet;
+
+        /// <summary>
+        /// Contains words that should be indexed but not stemmed. 
+        /// </summary>
+        //TODO: make this readonly in 3.1
+        private ISet<string> exclusionSet;
+
+        private Version matchVersion;
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words:
+        /// <see cref="GetDefaultStopSet"/>
+        /// </summary>
+        [Obsolete("Use GermanAnalyzer(Version) instead")]
+        public GermanAnalyzer()
+            : this(Version.LUCENE_23)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words:
+        /// <see cref="GetDefaultStopSet"/>
+        /// </summary>
+        public GermanAnalyzer(Version matchVersion)
+            : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
+        { }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. 
+        /// </summary>
+        /// <param name="matchVersion">Lucene compatibility version</param>
+        /// <param name="stopwords">a stopword set</param>
+        public GermanAnalyzer(Version matchVersion, ISet<string> stopwords)
+            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words
+        /// </summary>
+        /// <param name="matchVersion">lucene compatibility version</param>
+        /// <param name="stopwords">a stopword set</param>
+        /// <param name="stemExclusionSet">a stemming exclusion set</param>
+        public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet)
+        {
+            stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
+            exclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionSet));
+            SetOverridesTokenStreamMethod<GermanAnalyzer>();
+            this.matchVersion = matchVersion;
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. 
+        /// </summary>
+        /// <param name="stopwords"></param>
+        [Obsolete("use GermanAnalyzer(Version, Set) instead")]
+        public GermanAnalyzer(Version matchVersion, params string[] stopwords)
+            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words.
+        /// </summary>
+        [Obsolete("Use GermanAnalyzer(Version, ISet)")]
+        public GermanAnalyzer(Version matchVersion, IDictionary<string, string> stopwords)
+            : this(matchVersion, stopwords.Keys.ToArray())
+        {
+
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. 
+        /// </summary>
+        [Obsolete("Use GermanAnalyzer(Version, ISet)")]
+        public GermanAnalyzer(Version matchVersion, FileInfo stopwords)
+            : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
+        {
+        }
+
+        /// <summary>
+        /// Builds an exclusionlist from an array of Strings. 
+        /// </summary>
+        [Obsolete("Use GermanAnalyzer(Version, ISet, ISet) instead")]
+        public void SetStemExclusionTable(String[] exclusionlist)
+        {
+            exclusionSet = StopFilter.MakeStopSet(exclusionlist);
+            SetPreviousTokenStream(null);
+        }
+
+        /// <summary>
+        /// Builds an exclusionlist from a IDictionary. 
+        /// </summary>
+        [Obsolete("Use GermanAnalyzer(Version, ISet, ISet) instead")]
+        public void SetStemExclusionTable(IDictionary<string, string> exclusionlist)
+        {
+            exclusionSet = new HashSet<string>(exclusionlist.Keys);
+            SetPreviousTokenStream(null);
+        }
+
+        /// <summary>
+        /// Builds an exclusionlist from the words contained in the given file. 
+        /// </summary>
+        [Obsolete("Use GermanAnalyzer(Version, ISet, ISet) instead")]
+        public void SetStemExclusionTable(FileInfo exclusionlist)
+        {
+            exclusionSet = WordlistLoader.GetWordSet(exclusionlist);
+            SetPreviousTokenStream(null);
+        }
+
+        /// <summary>
+        /// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
+        /// </summary>
+        /// <param name="fieldName"></param>
+        /// <param name="reader"></param>
+        /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
+        public override TokenStream TokenStream(String fieldName, TextReader reader)
+        {
+            TokenStream result = new StandardTokenizer(matchVersion, reader);
+            result = new StandardFilter(result);
+            result = new LowerCaseFilter(result);
+            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
+            result = new GermanStemFilter(result, exclusionSet);
+            return result;
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs?rev=1294875&r1=1294874&r2=1294875&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs Tue Feb 28 22:43:08 2012
@@ -20,87 +20,89 @@
 */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Collections;
+using Lucene.Net.Analysis.Tokenattributes;
 
 namespace Lucene.Net.Analysis.De
 {
-	/// <summary>
-	/// A filter that stems German words. It supports a table of words that should
-	/// not be stemmed at all. The stemmer used can be changed at runtime after the
-	/// filter object is created (as long as it is a GermanStemmer).
-	/// </summary>
-	public sealed class GermanStemFilter : TokenFilter
-	{
-		/// <summary>
-		/// The actual token in the input stream.
-		/// </summary>
-		private Token token = null;
-		private GermanStemmer stemmer = null;
-		private Hashtable exclusions = null;
-    
-		public GermanStemFilter( TokenStream _in ) : base(_in)
-		{
-			stemmer = new GermanStemmer();
-		}
-    
-		/// <summary>
-		/// Builds a GermanStemFilter that uses an exclusiontable. 
-		/// </summary>
-		/// <param name="_in"></param>
-		/// <param name="exclusiontable"></param>
-		public GermanStemFilter( TokenStream _in, Hashtable exclusiontable ): this(_in)
-		{
-			exclusions = exclusiontable;
-		}
-    
-		/// <summary>
-		/// </summary>
-		/// <returns>Returns the next token in the stream, or null at EOS</returns>
-		public override Token Next()
-	
-		{
-			if ( ( token = input.Next() ) == null ) 
-			{
-				return null;
-			}
-				// Check the exclusiontable
-			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
-			{
-				return token;
-			}
-			else 
-			{
-				String s = stemmer.Stem( token.TermText() );
-				// If not stemmed, dont waste the time creating a new token
-				if ( !s.Equals( token.TermText() ) ) 
-				{
-					return new Token( s, token.StartOffset(),
-						token.EndOffset(), token.Type() );
-				}
-				return token;
-			}
-		}
-
-		/// <summary>
-		/// Set a alternative/custom GermanStemmer for this filter. 
-		/// </summary>
-		/// <param name="stemmer"></param>
-		public void SetStemmer( GermanStemmer stemmer )
-		{
-			if ( stemmer != null ) 
-			{
-				this.stemmer = stemmer;
-			}
-		}
-
-		/// <summary>
-		/// Set an alternative exclusion list for this filter. 
-		/// </summary>
-		/// <param name="exclusiontable"></param>
-		public void SetExclusionTable( Hashtable exclusiontable )
-		{
-			exclusions = exclusiontable;
-		}
-	}
+    /// <summary>
+    /// A filter that stems German words. It supports a table of words that should
+    /// not be stemmed at all. The stemmer used can be changed at runtime after the
+    /// filter object is created (as long as it is a GermanStemmer).
+    /// </summary>
+    public sealed class GermanStemFilter : TokenFilter
+    {
+        /// <summary>
+        /// The actual token in the input stream.
+        /// </summary>
+        private GermanStemmer stemmer = null;
+        private ISet<string> exclusionSet = null;
+
+        private TermAttribute termAtt;
+
+        public GermanStemFilter(TokenStream _in)
+            : base(_in)
+        {
+            stemmer = new GermanStemmer();
+            termAtt = AddAttribute<TermAttribute>();
+        }
+
+        /// <summary>
+        /// Builds a GermanStemFilter that uses an exclusiontable. 
+        /// </summary>
+        /// <param name="_in"></param>
+        /// <param name="exclusiontable"></param>
+        public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable)
+            : this(_in)
+        {
+            exclusionSet = exclusiontable;
+        }
+
+        /// <returns>
+        /// Returns true for next token in the stream, or false at EOS
+        /// </returns>
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                String term = termAtt.Term();
+                // Check the exclusion table.
+                if (exclusionSet == null || !exclusionSet.Contains(term))
+                {
+                    String s = stemmer.Stem(term);
+                    // If not stemmed, don't waste the time adjusting the token.
+                    if ((s != null) && !s.Equals(term))
+                        termAtt.SetTermBuffer(s);
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Set a alternative/custom GermanStemmer for this filter. 
+        /// </summary>
+        /// <param name="stemmer"></param>
+        public void SetStemmer(GermanStemmer stemmer)
+        {
+            if (stemmer != null)
+            {
+                this.stemmer = stemmer;
+            }
+        }
+
+        /// <summary>
+        /// Set an alternative exclusion list for this filter. 
+        /// </summary>
+        /// <param name="exclusiontable"></param>
+        public void SetExclusionTable(ISet<string> exclusiontable)
+        {
+            exclusionSet = exclusiontable;
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs?rev=1294875&r1=1294874&r2=1294875&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs Tue Feb 28 22:43:08 2012
@@ -20,6 +20,7 @@
 */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Text;
 using System.Collections;
@@ -27,192 +28,235 @@ using System.Collections;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.De;
 using Lucene.Net.Analysis.Standard;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net.Analysis.Fr
 {
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// Analyzer for french language. Supports an external list of stopwords (words that
-	/// will not be indexed at all) and an external list of exclusions (word that will
-	/// not be stemmed, but indexed).
-	/// A default set of stopwords is used unless an other list is specified, the
-	/// exclusionlist is empty by default.
-	/// 
-	/// <author>Patrick Talbot (based on Gerhard Schwarz work for German)</author>
-	/// <version>$Id: FrenchAnalyzer.java,v 1.9 2004/10/17 11:41:40 dnaber Exp $</version>
-	/// </summary>
-	public sealed class FrenchAnalyzer : Analyzer 
-	{
-
-		/// <summary>
-		/// Extended list of typical french stopwords.
-		/// </summary>
-		public static String[] FRENCH_STOP_WORDS = 
-				 {
-					 "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
-					 "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
-					 "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
-					 "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
-					 "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
-					 "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
-					 "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
-					 "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
-					 "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
-					 "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
-					 "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui", "là",
-					 "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi",
-					 "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
-					 "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
-					 "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
-					 "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
-					 "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
-					 "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
-					 "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
-					 "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers",
-					 "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès",
-					 "été", "être", "ô"
-				 };
-
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter.
-		/// </summary>
-		private Hashtable stoptable = new Hashtable();
-
-		/// <summary>
-		/// Contains words that should be indexed but not stemmed.
-		/// </summary>
-		private Hashtable excltable = new Hashtable();
-
-		/// <summary>
-		/// Builds an analyzer.
-		/// </summary>
-		public FrenchAnalyzer() 
-		{
-			stoptable = StopFilter.MakeStopSet( FRENCH_STOP_WORDS );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public FrenchAnalyzer( String[] stopwords ) 
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public FrenchAnalyzer( Hashtable stopwords ) 
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public FrenchAnalyzer( FileInfo stopwords ) 
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from an array of Strings.
-		/// </summary>
-		public void SetStemExclusionTable( String[] exclusionlist ) 
-		{
-			excltable = StopFilter.MakeStopSet( exclusionlist );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from a Hashtable.
-		/// </summary>
-		public void SetStemExclusionTable( Hashtable exclusionlist ) 
-		{
-			excltable = exclusionlist;
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from the words contained in the given file.
-		/// </summary>
-		public void SetStemExclusionTable( FileInfo exclusionlist ) 
-		{
-			excltable = WordlistLoader.GetWordtable( exclusionlist );
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided Reader.
-		/// </summary>
-		/// <returns>
-		/// A TokenStream build from a StandardTokenizer filtered with
-		/// 	StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
-		/// </returns>
-		public override TokenStream TokenStream( String fieldName, TextReader reader ) 
-		{
-		
-			if (fieldName==null) throw new ArgumentException("fieldName must not be null");
-			if (reader==null) throw new ArgumentException("readermust not be null");
-				
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new StopFilter( result, stoptable );
-			result = new FrenchStemFilter( result, excltable );
-			// Convert to lowercase after stemming!
-			result = new LowerCaseFilter( result );
-			return result;
-		}
-	}
-
-}
+    /**
+ * {@link Analyzer} for French language. 
+ * <p>
+ * Supports an external list of stopwords (words that
+ * will not be indexed at all) and an external list of exclusions (word that will
+ * not be stemmed, but indexed).
+ * A default set of stopwords is used unless an alternative list is specified, but the
+ * exclusion list is empty by default.
+ * </p>
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating FrenchAnalyzer:
+ * <ul>
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ * </ul>
+ *
+ * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}.</p>
+ */
+    public sealed class FrenchAnalyzer : Analyzer
+    {
+
+        /**
+         * Extended list of typical French stopwords.
+         * @deprecated use {@link #getDefaultStopSet()} instead
+         */
+        // TODO make this private in 3.1
+        public readonly static String[] FRENCH_STOP_WORDS = {
+    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
+    "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
+    "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
+    "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
+    "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
+    "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
+    "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
+    "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
+    "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
+    "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
+    "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui", "là",
+    "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi",
+    "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
+    "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
+    "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
+    "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
+    "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
+    "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
+    "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
+    "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers",
+    "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès",
+    "été", "être", "ô"
+  };
+
+        /**
+         * Contains the stopwords used with the {@link StopFilter}.
+         */
+        private readonly ISet<string> stoptable;
+        /**
+         * Contains words that should be indexed but not stemmed.
+         */
+        //TODO make this final in 3.0
+        private ISet<string> excltable = new HashSet<string>();
+
+        private readonly Version matchVersion;
+
+        /**
+         * Returns an unmodifiable instance of the default stop-words set.
+         * @return an unmodifiable instance of the default stop-words set.
+         */
+        public static ISet<string> GetDefaultStopSet()
+        {
+            return DefaultSetHolder.DEFAULT_STOP_SET;
+        }
+
+        static class DefaultSetHolder
+        {
+            internal static ISet<string> DEFAULT_STOP_SET = CharArraySet.UnmodifiableSet(new CharArraySet(FRENCH_STOP_WORDS, false));
+        }
+
+        /**
+         * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
+         */
+        public FrenchAnalyzer(Version matchVersion)
+            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+        {
+
+        }
+
+        /**
+         * Builds an analyzer with the given stop words
+         * 
+         * @param matchVersion
+         *          lucene compatibility version
+         * @param stopwords
+         *          a stopword set
+         */
+        public FrenchAnalyzer(Version matchVersion, ISet<string> stopwords)
+            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+        {
+        }
+
+        /**
+         * Builds an analyzer with the given stop words
+         * 
+         * @param matchVersion
+         *          lucene compatibility version
+         * @param stopwords
+         *          a stopword set
+         * @param stemExclutionSet
+         *          a stemming exclusion set
+         */
+        public FrenchAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclutionSet)
+        {
+            this.matchVersion = matchVersion;
+            this.stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
+            this.excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclutionSet));
+        }
+
+
+        /**
+         * Builds an analyzer with the given stop words.
+         * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
+         */
+        public FrenchAnalyzer(Version matchVersion, params string[] stopwords)
+            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
+        {
+
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         * @throws IOException
+         * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
+         */
+        public FrenchAnalyzer(Version matchVersion, FileInfo stopwords)
+            : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
+        {
+        }
+
+        /**
+         * Builds an exclusionlist from an array of Strings.
+         * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
+         */
+        public void SetStemExclusionTable(params string[] exclusionlist)
+        {
+            excltable = StopFilter.MakeStopSet(exclusionlist);
+            SetPreviousTokenStream(null); // force a new stemmer to be created
+        }
+
+        /**
+         * Builds an exclusionlist from a Map.
+         * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
+         */
+        public void SetStemExclusionTable(IDictionary<string, string> exclusionlist)
+        {
+            excltable = new HashSet<string>(exclusionlist.Keys);
+            SetPreviousTokenStream(null); // force a new stemmer to be created
+        }
+
+        /**
+         * Builds an exclusionlist from the words contained in the given file.
+         * @throws IOException
+         * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
+         */
+        public void SetStemExclusionTable(FileInfo exclusionlist)
+        {
+            excltable = new HashSet<string>(WordlistLoader.GetWordSet(exclusionlist));
+            SetPreviousTokenStream(null); // force a new stemmer to be created
+        }
+
+        /**
+         * Creates a {@link TokenStream} which tokenizes all the text in the provided
+         * {@link Reader}.
+         *
+         * @return A {@link TokenStream} built from a {@link StandardTokenizer} 
+         *         filtered with {@link StandardFilter}, {@link StopFilter}, 
+         *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
+         */
+        public override sealed TokenStream TokenStream(String fieldName, TextReader reader)
+        {
+            TokenStream result = new StandardTokenizer(matchVersion, reader);
+            result = new StandardFilter(result);
+            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                    result, stoptable);
+            result = new FrenchStemFilter(result, excltable);
+            // Convert to lowercase after stemming!
+            result = new LowerCaseFilter(result);
+            return result;
+        }
+
+        class SavedStreams
+        {
+            protected internal Tokenizer source;
+            protected internal TokenStream result;
+        };
+
+        /**
+         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the 
+         * text in the provided {@link Reader}.
+         *
+         * @return A {@link TokenStream} built from a {@link StandardTokenizer} 
+         *         filtered with {@link StandardFilter}, {@link StopFilter}, 
+         *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
+         */
+        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+        {
+            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();
+            if (streams == null)
+            {
+                streams = new SavedStreams();
+                streams.source = new StandardTokenizer(matchVersion, reader);
+                streams.result = new StandardFilter(streams.source);
+                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                                streams.result, stoptable);
+                streams.result = new FrenchStemFilter(streams.result, excltable);
+                // Convert to lowercase after stemming!
+                streams.result = new LowerCaseFilter(streams.result);
+                SetPreviousTokenStream(streams);
+            }
+            else
+            {
+                streams.source.Reset(reader);
+            }
+            return streams.result;
+        }
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchStemFilter.cs?rev=1294875&r1=1294874&r2=1294875&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchStemFilter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Fr/FrenchStemFilter.cs Tue Feb 28 22:43:08 2012
@@ -20,145 +20,94 @@
 */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Text;
 using System.Collections;
 
 using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
 
 namespace Lucene.Net.Analysis.Fr
 {
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// A filter that stemms french words. It supports a table of words that should
-	/// not be stemmed at all. The used stemmer can be changed at runtime after the
-	/// filter object is created (as long as it is a FrenchStemmer).
-	/// 
-	/// <author>Patrick Talbot (based on Gerhard Schwarz work for German)</author>
-	/// <version>$Id: FrenchAnalyzer.java,v 1.2 2004/01/23 20:54:47 ehatcher Exp $</version>
-	/// </summary>
-	public sealed class FrenchStemFilter : TokenFilter 
-	{
-
-		/// <summary>
-		/// The actual token in the input stream.
-		/// </summary>
-		private Token token = null;
-		private FrenchStemmer stemmer = null;
-		private Hashtable exclusions = null;
-
-		public FrenchStemFilter( TokenStream _in ) : base(_in)
-		{
-			stemmer = new FrenchStemmer();
-		}
-
-		/// <summary>
-		/// Builds a FrenchStemFilter that uses an exclusiontable.
-		/// </summary>
-		public FrenchStemFilter( TokenStream _in, Hashtable exclusiontable ) : 	this( _in )
-		{
-			exclusions = exclusiontable;
-		}
-
-		/// <summary>
-		/// Returns the next token in the stream, or null at EOS
-		/// </summary>
-		/// <returns>
-		/// Returns the next token in the stream, or null at EOS
-		/// </returns>
-		public override Token Next()
-		{
-			if ( ( token = input.Next() ) == null ) 
-			{
-				return null;
-			}
-				// Check the exclusiontable
-			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
-			{
-				return token;
-			}
-			else 
-			{
-				String s = stemmer.Stem( token.TermText() );
-				// If not stemmed, dont waste the time creating a new token
-				if ( !s.Equals( token.TermText() ) ) 
-				{
-					return new Token( s, 0, s.Length, token.Type() );
-				}
-				return token;
-			}
-		}
-
-		/// <summary>
-		/// Set a alternative/custom FrenchStemmer for this filter.
-		/// </summary>
-		public void SetStemmer( FrenchStemmer stemmer ) 
-		{
-			if ( stemmer != null ) 
-			{
-				this.stemmer = stemmer;
-			}
-		}
-
-		/// <summary>
-		/// Set an alternative exclusion list for this filter.
-		/// </summary>
-		public void SetExclusionTable( Hashtable exclusiontable ) 
-		{
-			exclusions = exclusiontable;
-		}
-	}
+    /**
+ * A {@link TokenFilter} that stems french words. 
+ * <p>
+ * It supports a table of words that should
+ * not be stemmed at all. The used stemmer can be changed at runtime after the
+ * filter object is created (as long as it is a {@link FrenchStemmer}).
+ * </p>
+ * NOTE: This stemmer does not implement the Snowball algorithm correctly,
+ * especially involving case problems. It is recommended that you consider using
+ * the "French" stemmer in the snowball package instead. This stemmer will likely
+ * be deprecated in a future release.
+ */
+    public sealed class FrenchStemFilter : TokenFilter
+    {
+
+        /**
+         * The actual token in the input stream.
+         */
+        private FrenchStemmer stemmer = null;
+        private ISet<string> exclusions = null;
+
+        private TermAttribute termAtt;
+
+        public FrenchStemFilter(TokenStream _in)
+            : base(_in)
+        {
+
+            stemmer = new FrenchStemmer();
+            termAtt = AddAttribute<TermAttribute>();
+        }
+
+
+        public FrenchStemFilter(TokenStream _in, ISet<string> exclusiontable)
+            : this(_in)
+        {
+            exclusions = exclusiontable;
+        }
+
+        /**
+         * @return  Returns true for the next token in the stream, or false at EOS
+         */
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                String term = termAtt.Term();
+
+                // Check the exclusion table
+                if (exclusions == null || !exclusions.Contains(term))
+                {
+                    String s = stemmer.Stem(term);
+                    // If not stemmed, don't waste the time  adjusting the token.
+                    if ((s != null) && !s.Equals(term))
+                        termAtt.SetTermBuffer(s);
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+        /**
+         * Set a alternative/custom {@link FrenchStemmer} for this filter.
+         */
+        public void SetStemmer(FrenchStemmer stemmer)
+        {
+            if (stemmer != null)
+            {
+                this.stemmer = stemmer;
+            }
+        }
+        /**
+         * Set an alternative exclusion list for this filter.
+         */
+        public void SetExclusionTable(IDictionary<string, string> exclusiontable)
+        {
+            exclusions = new HashSet<string>(exclusiontable.Keys);
+        }
+    }
 }



Mime
View raw message