lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pnas...@apache.org
Subject [Lucene.Net] svn commit: r1230919 [7/22] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: ./ build/scripts/ build/vs2010/contrib/ build/vs2010/core/ build/vs2010/demo/ build/vs2010/test/ src/contrib/Analyzers/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/...
Date Fri, 13 Jan 2012 08:42:38 GMT
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs Fri Jan 13 08:42:34 2012
@@ -1,166 +1,187 @@
-using System;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Nl
-{
-
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// A filter that stems Dutch words. It supports a table of words that should
-	/// not be stemmed at all. The stemmer used can be changed at runtime after the
-	/// filter object is created (as long as it is a DutchStemmer).
-	/// 
-	/// <version>$Id: DutchStemFilter.java,v 1.1 2004/03/09 14:55:08 otis Exp $</version>
-	/// </summary>
-	/// <author>Edwin de Jonge</author>
-	public sealed class DutchStemFilter : TokenFilter
-	{
-		/// <summary>
-		/// The actual token in the input stream.
-		/// </summary>
-		private Token token = null;
-		private DutchStemmer stemmer = null;
-        private ICollection<string> exclusions = null;
-    
-		public DutchStemFilter( TokenStream _in ) : base(_in)
-		{
-			stemmer = new DutchStemmer();
-		}
-    
-		/// <summary>
-		/// Builds a DutchStemFilter that uses an exclusiontable. 
-		/// </summary>
-		/// <param name="_in"></param>
-		/// <param name="exclusiontable"></param>
-        public DutchStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
-		{
-			exclusions = exclusiontable;
-		}
-
-		/// <summary>
-		/// 
-		/// </summary>
-		/// <param name="_in"></param>
-		/// <param name="exclusiontable"></param>
-		/// <param name="stemdictionary">Dictionary of word stem pairs, that overrule the algorithm</param>
-        public DutchStemFilter(TokenStream _in, ICollection<string> exclusiontable, Dictionary<string,string> stemdictionary) : this(_in, exclusiontable)
-		{
-			stemmer.SetStemDictionary(stemdictionary);
-		}
-
-		/// <summary>
-		/// </summary>
-		/// <returns>Returns the next token in the stream, or null at EOS</returns>
-		public override Token Next()
-	
-		{
-			if ( ( token = input.Next() ) == null ) 
-			{
-				return null;
-			}
-				// Check the exclusiontable
-			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
-			{
-				return token;
-			}
-			else 
-			{
-				String s = stemmer.Stem( token.TermText() );
-				// If not stemmed, dont waste the time creating a new token
-				if ( !s.Equals( token.TermText() ) ) 
-				{
-					return new Token( s, token.StartOffset(),
-						token.EndOffset(), token.Type() );
-				}
-				return token;
-			}
-		}
-
-		/// <summary>
-		/// Set a alternative/custom DutchStemmer for this filter. 
-		/// </summary>
-		/// <param name="stemmer"></param>
-		public void SetStemmer( DutchStemmer stemmer )
-		{
-			if ( stemmer != null ) 
-			{
-				this.stemmer = stemmer;
-			}
-		}
-
-		/// <summary>
-		/// Set an alternative exclusion list for this filter. 
-		/// </summary>
-		/// <param name="exclusiontable"></param>
-        public void SetExclusionTable(ICollection<string> exclusiontable)
-		{
-			exclusions = exclusiontable;
-		}
-
-		/// <summary>
-		/// Set dictionary for stemming, this dictionary overrules the algorithm,
-		/// so you can correct for a particular unwanted word-stem pair.
-		/// </summary>
-		/// <param name="dict"></param>
-		public void SetStemDictionary(Dictionary<string,string> dict)
-		{
-			if (stemmer != null)
-				stemmer.SetStemDictionary(dict);
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Nl
+{
+
+	/* ====================================================================
+	 * The Apache Software License, Version 1.1
+	 *
+	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
+	 * reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without
+	 * modification, are permitted provided that the following conditions
+	 * are met:
+	 *
+	 * 1. Redistributions of source code must retain the above copyright
+	 *    notice, this list of conditions and the following disclaimer.
+	 *
+	 * 2. Redistributions in binary form must reproduce the above copyright
+	 *    notice, this list of conditions and the following disclaimer in
+	 *    the documentation and/or other materials provided with the
+	 *    distribution.
+	 *
+	 * 3. The end-user documentation included with the redistribution,
+	 *    if any, must include the following acknowledgment:
+	 *       "This product includes software developed by the
+	 *        Apache Software Foundation (http://www.apache.org/)."
+	 *    Alternately, this acknowledgment may appear in the software itself,
+	 *    if and wherever such third-party acknowledgments normally appear.
+	 *
+	 * 4. The names "Apache" and "Apache Software Foundation" and
+	 *    "Apache Lucene" must not be used to endorse or promote products
+	 *    derived from this software without prior written permission. For
+	 *    written permission, please contact apache@apache.org.
+	 *
+	 * 5. Products derived from this software may not be called "Apache",
+	 *    "Apache Lucene", nor may "Apache" appear in their name, without
+	 *    prior written permission of the Apache Software Foundation.
+	 *
+	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+	 * SUCH DAMAGE.
+	 * ====================================================================
+	 *
+	 * This software consists of voluntary contributions made by many
+	 * individuals on behalf of the Apache Software Foundation.  For more
+	 * information on the Apache Software Foundation, please see
+	 * <http://www.apache.org/>.
+	 */
+
+	/// <summary>
+	/// A filter that stems Dutch words. It supports a table of words that should
+	/// not be stemmed at all. The stemmer used can be changed at runtime after the
+	/// filter object is created (as long as it is a DutchStemmer).
+	/// 
+	/// <version>$Id: DutchStemFilter.java,v 1.1 2004/03/09 14:55:08 otis Exp $</version>
+	/// </summary>
+	/// <author>Edwin de Jonge</author>
+	public sealed class DutchStemFilter : TokenFilter
+	{
+		/// <summary>
+		/// The actual token in the input stream.
+		/// </summary>
+		private Token token = null;
+		private DutchStemmer stemmer = null;
+        private ICollection<string> exclusions = null;
+    
+		public DutchStemFilter( TokenStream _in ) : base(_in)
+		{
+			stemmer = new DutchStemmer();
+		}
+    
+		/// <summary>
+		/// Builds a DutchStemFilter that uses an exclusiontable. 
+		/// </summary>
+		/// <param name="_in"></param>
+		/// <param name="exclusiontable"></param>
+        public DutchStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
+		{
+			exclusions = exclusiontable;
+		}
+
+		/// <summary>
+		/// 
+		/// </summary>
+		/// <param name="_in"></param>
+		/// <param name="exclusiontable"></param>
+		/// <param name="stemdictionary">Dictionary of word stem pairs, that overrule the algorithm</param>
+        public DutchStemFilter(TokenStream _in, ICollection<string> exclusiontable, Dictionary<string,string> stemdictionary) : this(_in, exclusiontable)
+		{
+			stemmer.SetStemDictionary(stemdictionary);
+		}
+
+		/// <summary>
+		/// </summary>
+		/// <returns>Returns the next token in the stream, or null at EOS</returns>
+		public override Token Next()
+	
+		{
+			if ( ( token = input.Next() ) == null ) 
+			{
+				return null;
+			}
+				// Check the exclusiontable
+			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
+			{
+				return token;
+			}
+			else 
+			{
+				String s = stemmer.Stem( token.TermText() );
+				// If not stemmed, dont waste the time creating a new token
+				if ( !s.Equals( token.TermText() ) ) 
+				{
+					return new Token( s, token.StartOffset(),
+						token.EndOffset(), token.Type() );
+				}
+				return token;
+			}
+		}
+
+		/// <summary>
+		/// Set a alternative/custom DutchStemmer for this filter. 
+		/// </summary>
+		/// <param name="stemmer"></param>
+		public void SetStemmer( DutchStemmer stemmer )
+		{
+			if ( stemmer != null ) 
+			{
+				this.stemmer = stemmer;
+			}
+		}
+
+		/// <summary>
+		/// Set an alternative exclusion list for this filter. 
+		/// </summary>
+		/// <param name="exclusiontable"></param>
+        public void SetExclusionTable(ICollection<string> exclusiontable)
+		{
+			exclusions = exclusiontable;
+		}
+
+		/// <summary>
+		/// Set dictionary for stemming, this dictionary overrules the algorithm,
+		/// so you can correct for a particular unwanted word-stem pair.
+		/// </summary>
+		/// <param name="dict"></param>
+		public void SetStemDictionary(Dictionary<string,string> dict)
+		{
+			if (stemmer != null)
+				stemmer.SetStemDictionary(dict);
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs Fri Jan 13 08:42:34 2012
@@ -1,486 +1,507 @@
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Nl
-{
-
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// A stemmer for Dutch words. The algorithm is an implementation of
-	/// the <see c="http://snowball.tartarus.org/dutch/stemmer.html">dutch stemming</see>
-	/// algorithm in snowball. Snowball is a project of Martin Porter (does Porter Stemmer ring a bell?): 
-	/// 
-	/// @version   $Id: DutchStemmer.java,v 1.1 2004/03/09 14:55:08 otis Exp $
-	/// </summary>
-	/// <author>Edwin de Jonge (ejne@cbs.nl)</author>
-	public class DutchStemmer
-	{
-		/// <summary>
-		/// Buffer for the terms while stemming them. 
-		/// </summary>
-		private StringBuilder sb = new StringBuilder();
-		private bool _removedE;
-		private Dictionary<string,string> _stemDict;
-
-
-		private int _R1;
-		private int _R2;
-
-		/// <summary>
-		/// Stemms the given term to an unique <tt>discriminator</tt>.
-		/// </summary>
-		/// <param name="term">The term that should be stemmed.</param>
-		/// <returns>Discriminator for <tt>term</tt></returns>
-		//TODO convert to internal
-		public string Stem( String term )
-		{
-			term = term.ToLower();
-			if ( !IsStemmable( term ) )
-				return term;
-			if (_stemDict != null && _stemDict.ContainsKey(term))
-				return _stemDict[term] as string;
-			// Reset the StringBuilder.
-			sb.Remove(0, sb.Length);
-			sb.Insert(0, term);
-			// Stemming starts here...
-			Substitute(sb);
-			StoreYandI(sb);
-			_R1 = GetRIndex(sb, 0);
-			_R1 = Math.Max(3,_R1);
-			Step1(sb);
-			Step2(sb);
-			_R2 = GetRIndex(sb, _R1);
-			Step3a(sb);
-			Step3b(sb);
-			Step4(sb);
-			ReStoreYandI(sb);
-			return sb.ToString();
-		}
-
-		private bool enEnding(StringBuilder sb)
-		{
-			string[] enend = new string[]{"ene","en"};
-			foreach(string end in enend)
-			{
-				string s = sb.ToString();
-				int index = s.Length - end.Length;
-				if ( s.EndsWith(end) &&
-					  index >= _R1 && 
-					  IsValidEnEnding(sb,index-1) 
-					)
-				{
-					sb.Remove(index, end.Length);
-					UnDouble(sb,index);
-					return true;
-				}
-			}
-			return false;
-		}
-
-
-		private void Step1(StringBuilder sb)
-		{
-			if (_R1 >= sb.Length)
-				return;
-
-			string s = sb.ToString();
-			int lengthR1 = sb.Length - _R1;
-			int index;
-
-			if (s.EndsWith("heden"))
-			{
-				sb.Replace("heden","heid", _R1, lengthR1);
-				return;
-			}
-
-			if (enEnding(sb))
-				return;
-			
-			if (s.EndsWith("se")              && 
-				 (index = s.Length - 2) >= _R1  &&
-				 IsValidSEnding(sb, index -1)
-				)
-			{
-				sb.Remove(index, 2);
-				return;
-			} 
-			if (s.EndsWith("s") && 
-				(index = s.Length - 1) >= _R1  &&
-				IsValidSEnding(sb, index - 1))
-			{
-				sb.Remove(index, 1);
-			}
-		}
-
-		/// <summary>
-		/// Delete suffix e if in R1 and 
-		/// preceded by a non-vowel, and then undouble the ending
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step2(StringBuilder sb)
-		{
-			_removedE = false;
-			if (_R1 >= sb.Length)
-				return;
-			string s = sb.ToString();
-			int index = s.Length - 1;
-			if ( index >= _R1   && 
-				 s.EndsWith("e") &&
-				 !IsVowel(sb[index-1]))
-			{
-				sb.Remove(index,1);
-				UnDouble(sb);
-				_removedE = true;
-			}
-		}
-
-		/// <summary>
-		/// Delete "heid"
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step3a(StringBuilder sb)
-		{
-			if (_R2 >= sb.Length)
-				return;
-			string s = sb.ToString();
-			int index = s.Length - 4;
-			if (s.EndsWith("heid")&& index >= _R2 && sb[index - 1] != 'c')
-			{
-				sb.Remove(index,4); //remove heid
-				enEnding(sb);
-			}
-		}
-
-		/// <summary>
-		/// <p>A d-suffix, or derivational suffix, enables a new word, 
-		/// often with a different grammatical category, or with a different 
-		/// sense, to be built from another word. Whether a d-suffix can be 
-		/// attached is discovered not from the rules of grammar, but by 
-		/// referring to a dictionary. So in English, ness can be added to 
-		/// certain adjectives to form corresponding nouns (littleness, 
-		/// kindness, foolishness ...) but not to all adjectives 
-		/// (not for example, to big, cruel, wise ...) d-suffixes can be 
-		/// used to change meaning, often in rather exotic ways.</p>
-		/// Remove "ing", "end", "ig", "lijk", "baar" and "bar"
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step3b(StringBuilder sb)
-		{
-			if (_R2 >= sb.Length)
-				return;
-			string s = sb.ToString();
-			int index;
-
-			if ((s.EndsWith("end") || s.EndsWith("ing")) &&
-      		 (index = s.Length - 3) >= _R2
-				)
-			{
-				sb.Remove(index,3);
-				if (sb[index - 2] == 'i' && 
-					 sb[index - 1] == 'g')
-				{
-					if (sb[index - 3] != 'e' & index-2 >= _R2)
-					{
-						index -= 2;
-						sb.Remove(index,2);
-					}
-				}
-				else
-				{
-					UnDouble(sb,index);
-				}
-				return;
-			}
-			if ( s.EndsWith("ig")    &&
-				  (index = s.Length - 2) >= _R2
-				)
-			{
-				if (sb[index - 1] != 'e')
-					sb.Remove(index, 2);
-				return;
-			}
-			if (s.EndsWith("lijk") &&
-				 (index = s.Length - 4) >= _R2
-				)
-			{
-				sb.Remove(index, 4);
-				Step2(sb);
-				return;
-			}
-			if (s.EndsWith("baar") &&
-				(index = s.Length - 4) >= _R2
-				)
-			{
-				sb.Remove(index, 4);
-				return;
-			}
-			if (s.EndsWith("bar")  &&
-				 (index = s.Length - 3) >= _R2
-				)
-			{
-				if (_removedE)
-					sb.Remove(index, 3);
-				return;
-			}
-		}
-
-		/// <summary>
-		/// undouble vowel 
-		/// If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod). 
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step4(StringBuilder sb)
-		{
-			if (sb.Length < 4)
-				return;
-			string end = sb.ToString(sb.Length - 4,4);
-			char c = end[0];
-			char v1 = end[1];
-			char v2 = end[2];
-			char d = end[3];
-			if (v1 == v2    &&
-				 d != 'I'    &&
-				 v1 != 'i'    &&
-				 IsVowel(v1) &&
-				!IsVowel(d)  &&
-				!IsVowel(c))
-			{
-				sb.Remove(sb.Length - 2, 1);
-			}
-		}
-
-		/// <summary>
-		/// Checks if a term could be stemmed.
-		/// </summary>
-		/// <param name="term"></param>
-		/// <returns>true if, and only if, the given term consists in letters.</returns>
-		private bool IsStemmable( String term )
-		{
-			for ( int c = 0; c < term.Length; c++ ) 
-			{
-				if ( !Char.IsLetter(term[c])) return false;
-			}
-			return true;
-		}
-
-		/// <summary>
-		/// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
-		/// </summary>
-		/// <param name="buffer"></param>
-		private void Substitute( StringBuilder buffer )
-		{
-			for ( int i = 0; i < buffer.Length; i++ ) 
-			{
-				switch (buffer[i])
-				{
-					case 'ä':
-					case 'á':
-					{
-						buffer[i] = 'a';
-						break;
-					}
-					case 'ë':
-					case 'é':
-					{
-						buffer[i] = 'e';
-						break;
-					}
-					case 'ü':
-					case 'ú':
-					{
-						buffer[i] = 'u';
-						break;
-					}
-					case 'ï':
-					case 'i':
-					{
-						buffer[i] = 'i';
-						break;
-					}
-					case 'ö':
-					case 'ó':
-					{
-						buffer[i] = 'o';
-						break;
-					}
-				}
-			}
-		}
-
-//		private bool IsValidSEnding(StringBuilder sb)
-//		{
-//			return  IsValidSEnding(sb,sb.Length - 1);
-//		}
-
-		private bool IsValidSEnding(StringBuilder sb, int index)
-		{
-			char c = sb[index];
-			if (IsVowel(c) || c == 'j')
-				return false;
-			return true;
-		}
-
-//		private bool IsValidEnEnding(StringBuilder sb)
-//		{
-//			return IsValidEnEnding(sb,sb.Length - 1);
-//		}
-
-		private bool IsValidEnEnding(StringBuilder sb, int index)
-		{
-			char c = sb[index];
-			if (IsVowel(c))
-				return false;
-			if (c < 3)
-				return false;
-			// ends with "gem"?
-			if (c == 'm' && sb[index - 2] == 'g' && sb[index-1] == 'e')
-				return false;
-			return true;
-		}
-
-		private void UnDouble(StringBuilder sb)
-		{
-			UnDouble(sb, sb.Length);
-		}
-
-		private void UnDouble(StringBuilder sb, int endIndex)
-		{
-			string s = sb.ToString(0, endIndex);
-			if (s.EndsWith("kk") || s.EndsWith("tt") || s.EndsWith("dd") || s.EndsWith("nn") || s.EndsWith("mm") || s.EndsWith("ff"))
-			{
-				sb.Remove(endIndex-1,1);
-			}
-		}
-
-		private int GetRIndex(StringBuilder sb, int start)
-		{
-			if (start == 0) 
-				start = 1;
-			int i = start;
-			for (; i < sb.Length; i++)
-			{
-				//first non-vowel preceded by a vowel
-				if (!IsVowel(sb[i]) && IsVowel(sb[i-1]))
-				{
-					return i + 1;
-				}
-			}
-			return i + 1;
-		}
-
-		private void StoreYandI(StringBuilder sb)
-		{
-			if (sb[0] == 'y')
-				sb[0] = 'Y';
-			//char c;
-			int last = sb.Length - 1;
-			for (int i = 1; i < last; i++)
-			{
-				switch (sb[i])
-				{
-					case 'i':
-					{
-						if (IsVowel(sb[i-1]) && 
-							IsVowel(sb[i+1])
-							)
-							sb[i] = 'I';
-						break;
-					}
-					case 'y':
-					{
-						if (IsVowel(sb[i-1]))
-							sb[i] = 'Y';
-						break;
-					}
-				}
-			}
-			if (last > 0 && sb[last]=='y' && IsVowel(sb[last-1]))
-				sb[last]='Y';
-		}
-
-		private void ReStoreYandI(StringBuilder sb)
-		{
-			sb.Replace("I","i");
-			sb.Replace("Y","y");
-		}
-
-		private bool IsVowel(char c)
-		{
-			switch (c)
-			{
-				case 'e':
-				case 'a':
-				case 'o':
-				case 'i':
-				case 'u':
-				case 'y':
-				case 'è':
-				{
-					return true;
-				}
-			}
-			return false;
-		}
-
-		internal void SetStemDictionary(Dictionary<string,string> dict)
-		{
-			_stemDict = dict;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Text;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Nl
+{
+
+	/* ====================================================================
+	 * The Apache Software License, Version 1.1
+	 *
+	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
+	 * reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without
+	 * modification, are permitted provided that the following conditions
+	 * are met:
+	 *
+	 * 1. Redistributions of source code must retain the above copyright
+	 *    notice, this list of conditions and the following disclaimer.
+	 *
+	 * 2. Redistributions in binary form must reproduce the above copyright
+	 *    notice, this list of conditions and the following disclaimer in
+	 *    the documentation and/or other materials provided with the
+	 *    distribution.
+	 *
+	 * 3. The end-user documentation included with the redistribution,
+	 *    if any, must include the following acknowledgment:
+	 *       "This product includes software developed by the
+	 *        Apache Software Foundation (http://www.apache.org/)."
+	 *    Alternately, this acknowledgment may appear in the software itself,
+	 *    if and wherever such third-party acknowledgments normally appear.
+	 *
+	 * 4. The names "Apache" and "Apache Software Foundation" and
+	 *    "Apache Lucene" must not be used to endorse or promote products
+	 *    derived from this software without prior written permission. For
+	 *    written permission, please contact apache@apache.org.
+	 *
+	 * 5. Products derived from this software may not be called "Apache",
+	 *    "Apache Lucene", nor may "Apache" appear in their name, without
+	 *    prior written permission of the Apache Software Foundation.
+	 *
+	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+	 * SUCH DAMAGE.
+	 * ====================================================================
+	 *
+	 * This software consists of voluntary contributions made by many
+	 * individuals on behalf of the Apache Software Foundation.  For more
+	 * information on the Apache Software Foundation, please see
+	 * <http://www.apache.org/>.
+	 */
+
+	/// <summary>
+	/// A stemmer for Dutch words. The algorithm is an implementation of
+	/// the <see c="http://snowball.tartarus.org/dutch/stemmer.html">dutch stemming</see>
+	/// algorithm in snowball. Snowball is a project of Martin Porter (does Porter Stemmer ring a bell?): 
+	/// 
+	/// @version   $Id: DutchStemmer.java,v 1.1 2004/03/09 14:55:08 otis Exp $
+	/// </summary>
+	/// <author>Edwin de Jonge (ejne@cbs.nl)</author>
+	public class DutchStemmer
+	{
+		/// <summary>
+		/// Buffer for the terms while stemming them. 
+		/// </summary>
+		private StringBuilder sb = new StringBuilder();
+		private bool _removedE;
+		private Dictionary<string,string> _stemDict;
+
+
+		private int _R1;
+		private int _R2;
+
+		/// <summary>
+		/// Stemms the given term to an unique <tt>discriminator</tt>.
+		/// </summary>
+		/// <param name="term">The term that should be stemmed.</param>
+		/// <returns>Discriminator for <tt>term</tt></returns>
+		//TODO convert to internal
+		public string Stem( String term )
+		{
+			term = term.ToLower();
+			if ( !IsStemmable( term ) )
+				return term;
+			if (_stemDict != null && _stemDict.ContainsKey(term))
+				return _stemDict[term] as string;
+			// Reset the StringBuilder.
+			sb.Remove(0, sb.Length);
+			sb.Insert(0, term);
+			// Stemming starts here...
+			Substitute(sb);
+			StoreYandI(sb);
+			_R1 = GetRIndex(sb, 0);
+			_R1 = Math.Max(3,_R1);
+			Step1(sb);
+			Step2(sb);
+			_R2 = GetRIndex(sb, _R1);
+			Step3a(sb);
+			Step3b(sb);
+			Step4(sb);
+			ReStoreYandI(sb);
+			return sb.ToString();
+		}
+
+		private bool enEnding(StringBuilder sb)
+		{
+			string[] enend = new string[]{"ene","en"};
+			foreach(string end in enend)
+			{
+				string s = sb.ToString();
+				int index = s.Length - end.Length;
+				if ( s.EndsWith(end) &&
+					  index >= _R1 && 
+					  IsValidEnEnding(sb,index-1) 
+					)
+				{
+					sb.Remove(index, end.Length);
+					UnDouble(sb,index);
+					return true;
+				}
+			}
+			return false;
+		}
+
+
+		private void Step1(StringBuilder sb)
+		{
+			if (_R1 >= sb.Length)
+				return;
+
+			string s = sb.ToString();
+			int lengthR1 = sb.Length - _R1;
+			int index;
+
+			if (s.EndsWith("heden"))
+			{
+				sb.Replace("heden","heid", _R1, lengthR1);
+				return;
+			}
+
+			if (enEnding(sb))
+				return;
+			
+			if (s.EndsWith("se")              && 
+				 (index = s.Length - 2) >= _R1  &&
+				 IsValidSEnding(sb, index -1)
+				)
+			{
+				sb.Remove(index, 2);
+				return;
+			} 
+			if (s.EndsWith("s") && 
+				(index = s.Length - 1) >= _R1  &&
+				IsValidSEnding(sb, index - 1))
+			{
+				sb.Remove(index, 1);
+			}
+		}
+
+		/// <summary>
+		/// Delete suffix e if in R1 and 
+		/// preceded by a non-vowel, and then undouble the ending
+		/// </summary>
+		/// <param name="sb">string being stemmed</param>
+		private void Step2(StringBuilder sb)
+		{
+			_removedE = false;
+			if (_R1 >= sb.Length)
+				return;
+			string s = sb.ToString();
+			int index = s.Length - 1;
+			if ( index >= _R1   && 
+				 s.EndsWith("e") &&
+				 !IsVowel(sb[index-1]))
+			{
+				sb.Remove(index,1);
+				UnDouble(sb);
+				_removedE = true;
+			}
+		}
+
+		/// <summary>
+		/// Delete "heid"
+		/// </summary>
+		/// <param name="sb">string being stemmed</param>
+		private void Step3a(StringBuilder sb)
+		{
+			if (_R2 >= sb.Length)
+				return;
+			string s = sb.ToString();
+			int index = s.Length - 4;
+			if (s.EndsWith("heid")&& index >= _R2 && sb[index - 1] != 'c')
+			{
+				sb.Remove(index,4); //remove heid
+				enEnding(sb);
+			}
+		}
+
+		/// <summary>
+		/// <p>A d-suffix, or derivational suffix, enables a new word, 
+		/// often with a different grammatical category, or with a different 
+		/// sense, to be built from another word. Whether a d-suffix can be 
+		/// attached is discovered not from the rules of grammar, but by 
+		/// referring to a dictionary. So in English, ness can be added to 
+		/// certain adjectives to form corresponding nouns (littleness, 
+		/// kindness, foolishness ...) but not to all adjectives 
+		/// (not for example, to big, cruel, wise ...) d-suffixes can be 
+		/// used to change meaning, often in rather exotic ways.</p>
+		/// Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+		/// </summary>
+		/// <param name="sb">string being stemmed</param>
+		private void Step3b(StringBuilder sb)
+		{
+			if (_R2 >= sb.Length)
+				return;
+			string s = sb.ToString();
+			int index;
+
+			if ((s.EndsWith("end") || s.EndsWith("ing")) &&
+      		 (index = s.Length - 3) >= _R2
+				)
+			{
+				sb.Remove(index,3);
+				if (sb[index - 2] == 'i' && 
+					 sb[index - 1] == 'g')
+				{
+					if (sb[index - 3] != 'e' & index-2 >= _R2)
+					{
+						index -= 2;
+						sb.Remove(index,2);
+					}
+				}
+				else
+				{
+					UnDouble(sb,index);
+				}
+				return;
+			}
+			if ( s.EndsWith("ig")    &&
+				  (index = s.Length - 2) >= _R2
+				)
+			{
+				if (sb[index - 1] != 'e')
+					sb.Remove(index, 2);
+				return;
+			}
+			if (s.EndsWith("lijk") &&
+				 (index = s.Length - 4) >= _R2
+				)
+			{
+				sb.Remove(index, 4);
+				Step2(sb);
+				return;
+			}
+			if (s.EndsWith("baar") &&
+				(index = s.Length - 4) >= _R2
+				)
+			{
+				sb.Remove(index, 4);
+				return;
+			}
+			if (s.EndsWith("bar")  &&
+				 (index = s.Length - 3) >= _R2
+				)
+			{
+				if (_removedE)
+					sb.Remove(index, 3);
+				return;
+			}
+		}
+
+		/// <summary>
+		/// undouble vowel 
+		/// If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod). 
+		/// </summary>
+		/// <param name="sb">string being stemmed</param>
+		private void Step4(StringBuilder sb)
+		{
+			if (sb.Length < 4)
+				return;
+			string end = sb.ToString(sb.Length - 4,4);
+			char c = end[0];
+			char v1 = end[1];
+			char v2 = end[2];
+			char d = end[3];
+			if (v1 == v2    &&
+				 d != 'I'    &&
+				 v1 != 'i'    &&
+				 IsVowel(v1) &&
+				!IsVowel(d)  &&
+				!IsVowel(c))
+			{
+				sb.Remove(sb.Length - 2, 1);
+			}
+		}
+
+		/// <summary>
+		/// Checks if a term could be stemmed.
+		/// </summary>
+		/// <param name="term"></param>
+		/// <returns>true if, and only if, the given term consists in letters.</returns>
+		private bool IsStemmable( String term )
+		{
+			for ( int c = 0; c < term.Length; c++ ) 
+			{
+				if ( !Char.IsLetter(term[c])) return false;
+			}
+			return true;
+		}
+
+		/// <summary>
+		/// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
+		/// </summary>
+		/// <param name="buffer"></param>
+		private void Substitute( StringBuilder buffer )
+		{
+			for ( int i = 0; i < buffer.Length; i++ ) 
+			{
+				switch (buffer[i])
+				{
+					case 'ä':
+					case 'á':
+					{
+						buffer[i] = 'a';
+						break;
+					}
+					case 'ë':
+					case 'é':
+					{
+						buffer[i] = 'e';
+						break;
+					}
+					case 'ü':
+					case 'ú':
+					{
+						buffer[i] = 'u';
+						break;
+					}
+					case 'ï':
+					case 'i':
+					{
+						buffer[i] = 'i';
+						break;
+					}
+					case 'ö':
+					case 'ó':
+					{
+						buffer[i] = 'o';
+						break;
+					}
+				}
+			}
+		}
+
+//		private bool IsValidSEnding(StringBuilder sb)
+//		{
+//			return  IsValidSEnding(sb,sb.Length - 1);
+//		}
+
+		private bool IsValidSEnding(StringBuilder sb, int index)
+		{
+			char c = sb[index];
+			if (IsVowel(c) || c == 'j')
+				return false;
+			return true;
+		}
+
+//		private bool IsValidEnEnding(StringBuilder sb)
+//		{
+//			return IsValidEnEnding(sb,sb.Length - 1);
+//		}
+
+		private bool IsValidEnEnding(StringBuilder sb, int index)
+		{
+			char c = sb[index];
+			if (IsVowel(c))
+				return false;
+			if (c < 3)
+				return false;
+			// ends with "gem"?
+			if (c == 'm' && sb[index - 2] == 'g' && sb[index-1] == 'e')
+				return false;
+			return true;
+		}
+
+		private void UnDouble(StringBuilder sb)
+		{
+			UnDouble(sb, sb.Length);
+		}
+
+		private void UnDouble(StringBuilder sb, int endIndex)
+		{
+			string s = sb.ToString(0, endIndex);
+			if (s.EndsWith("kk") || s.EndsWith("tt") || s.EndsWith("dd") || s.EndsWith("nn") || s.EndsWith("mm") || s.EndsWith("ff"))
+			{
+				sb.Remove(endIndex-1,1);
+			}
+		}
+
+		private int GetRIndex(StringBuilder sb, int start)
+		{
+			if (start == 0) 
+				start = 1;
+			int i = start;
+			for (; i < sb.Length; i++)
+			{
+				//first non-vowel preceded by a vowel
+				if (!IsVowel(sb[i]) && IsVowel(sb[i-1]))
+				{
+					return i + 1;
+				}
+			}
+			return i + 1;
+		}
+
+		private void StoreYandI(StringBuilder sb)
+		{
+			if (sb[0] == 'y')
+				sb[0] = 'Y';
+			//char c;
+			int last = sb.Length - 1;
+			for (int i = 1; i < last; i++)
+			{
+				switch (sb[i])
+				{
+					case 'i':
+					{
+						if (IsVowel(sb[i-1]) && 
+							IsVowel(sb[i+1])
+							)
+							sb[i] = 'I';
+						break;
+					}
+					case 'y':
+					{
+						if (IsVowel(sb[i-1]))
+							sb[i] = 'Y';
+						break;
+					}
+				}
+			}
+			if (last > 0 && sb[last]=='y' && IsVowel(sb[last-1]))
+				sb[last]='Y';
+		}
+
+		private void ReStoreYandI(StringBuilder sb)
+		{
+			sb.Replace("I","i");
+			sb.Replace("Y","y");
+		}
+
+		private bool IsVowel(char c)
+		{
+			switch (c)
+			{
+				case 'e':
+				case 'a':
+				case 'o':
+				case 'i':
+				case 'u':
+				case 'y':
+				case 'è':
+				{
+					return true;
+				}
+			}
+			return false;
+		}
+
+		internal void SetStemDictionary(Dictionary<string,string> dict)
+		{
+			_stemDict = dict;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs Fri Jan 13 08:42:34 2012
@@ -1,181 +1,202 @@
-using System;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Nl
-{
-
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
-	/// should contain only one word. If the file is not found or on any error, an
-	/// empty table is returned.
-	/// 
-	/// <version>$Id: WordListLoader.java,v 1.1 2004/03/09 14:55:08 otis Exp $</version>
-	/// </summary>
-	/// <author>Gerhard Schwarz</author>
-	public class WordlistLoader
-	{
-		/// <param name="path">Path to the wordlist</param>
-		/// <param name="wordfile">Name of the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(String path, String wordfile) 
-		{
-			if ( path == null || wordfile == null ) 
-			{
-				return new List<string>();
-			}
-			return GetWordtable(new FileInfo(path + "\\" + wordfile));
-		}
-
-		/// <param name="wordfile">Complete path to the wordlist</param>
-        public static ICollection<string> GetWordtable(String wordfile) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new List<string>();
-			}
-			return GetWordtable( new FileInfo( wordfile ) );
-		}
-
-		/// <summary>
-		/// Reads a stemsdictionary. Each line contains: 
-		/// word \t stem 
-		/// (i.e. tab seperated)
-		/// </summary>
-		/// <param name="wordstemfile"></param>
-		/// <returns>Stem dictionary that overrules, the stemming algorithm</returns>
-        public static Dictionary<string,string> GetStemDict(FileInfo wordstemfile)
-		{
-			if ( wordstemfile == null ) 
-			{
-				return new Dictionary<string,string>();
-			}
-			Dictionary<string,string> result = new Dictionary<string,string>();
-			try 
-			{
-				StreamReader lnr = new StreamReader(wordstemfile.FullName);
-				string line;
-				string[] wordstem;
-				while ((line = lnr.ReadLine()) != null)
-				{
-					wordstem = line.Split(new char[]{'\t'},2);
-					result.Add(wordstem[0], wordstem[1]);
-			   }
-			}
-			catch (IOException) 
-			{
-			}
-			return result;
-		}
-
-		/// <summary>
-		/// 
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(FileInfo wordfile) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new List<string>();
-			}
-            ICollection<string> result = null;
-			try 
-			{
-				StreamReader lnr = new StreamReader(wordfile.FullName);
-				String word = null;
-				String[] stopwords = new String[100];
-				int wordcount = 0;
-				while ( ( word = lnr.ReadLine() ) != null ) 
-				{
-					wordcount++;
-					if ( wordcount == stopwords.Length ) 
-					{
-						String[] tmp = new String[stopwords.Length + 50];
-						Array.Copy( stopwords, 0, tmp, 0, wordcount );
-						stopwords = tmp;
-					}
-					stopwords[wordcount-1] = word;
-				}
-				result = MakeWordTable( stopwords, wordcount );
-			}
-				// On error, use an empty table
-			catch (IOException) 
-			{
-				result = new List<string>();
-			}
-			return result;
-		}
-
-		/// <summary>
-		/// Builds the wordlist table.
-		/// </summary>
-		/// <param name="words">Word that where read</param>
-		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
-		/// <returns></returns>
-        private static ICollection<string> MakeWordTable(String[] words, int length) 
-		{
-            List<string> table = new List<string>(length);
-			for ( int i = 0; i < length; i++ ) 
-			{
-				table.Add(words[i]);
-			}
-			return table;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Nl
+{
+
+	/* ====================================================================
+	 * The Apache Software License, Version 1.1
+	 *
+	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
+	 * reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without
+	 * modification, are permitted provided that the following conditions
+	 * are met:
+	 *
+	 * 1. Redistributions of source code must retain the above copyright
+	 *    notice, this list of conditions and the following disclaimer.
+	 *
+	 * 2. Redistributions in binary form must reproduce the above copyright
+	 *    notice, this list of conditions and the following disclaimer in
+	 *    the documentation and/or other materials provided with the
+	 *    distribution.
+	 *
+	 * 3. The end-user documentation included with the redistribution,
+	 *    if any, must include the following acknowledgment:
+	 *       "This product includes software developed by the
+	 *        Apache Software Foundation (http://www.apache.org/)."
+	 *    Alternately, this acknowledgment may appear in the software itself,
+	 *    if and wherever such third-party acknowledgments normally appear.
+	 *
+	 * 4. The names "Apache" and "Apache Software Foundation" and
+	 *    "Apache Lucene" must not be used to endorse or promote products
+	 *    derived from this software without prior written permission. For
+	 *    written permission, please contact apache@apache.org.
+	 *
+	 * 5. Products derived from this software may not be called "Apache",
+	 *    "Apache Lucene", nor may "Apache" appear in their name, without
+	 *    prior written permission of the Apache Software Foundation.
+	 *
+	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+	 * SUCH DAMAGE.
+	 * ====================================================================
+	 *
+	 * This software consists of voluntary contributions made by many
+	 * individuals on behalf of the Apache Software Foundation.  For more
+	 * information on the Apache Software Foundation, please see
+	 * <http://www.apache.org/>.
+	 */
+
+	/// <summary>
+	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
+	/// should contain only one word. If the file is not found or on any error, an
+	/// empty table is returned.
+	/// 
+	/// <version>$Id: WordListLoader.java,v 1.1 2004/03/09 14:55:08 otis Exp $</version>
+	/// </summary>
+	/// <author>Gerhard Schwarz</author>
+	public class WordlistLoader
+	{
+		/// <param name="path">Path to the wordlist</param>
+		/// <param name="wordfile">Name of the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(String path, String wordfile) 
+		{
+			if ( path == null || wordfile == null ) 
+			{
+				return new List<string>();
+			}
+			return GetWordtable(new FileInfo(path + "\\" + wordfile));
+		}
+
+		/// <param name="wordfile">Complete path to the wordlist</param>
+        public static ICollection<string> GetWordtable(String wordfile) 
+		{
+			if ( wordfile == null ) 
+			{
+				return new List<string>();
+			}
+			return GetWordtable( new FileInfo( wordfile ) );
+		}
+
+		/// <summary>
+		/// Reads a stemsdictionary. Each line contains: 
+		/// word \t stem 
+		/// (i.e. tab seperated)
+		/// </summary>
+		/// <param name="wordstemfile"></param>
+		/// <returns>Stem dictionary that overrules, the stemming algorithm</returns>
+        public static Dictionary<string,string> GetStemDict(FileInfo wordstemfile)
+		{
+			if ( wordstemfile == null ) 
+			{
+				return new Dictionary<string,string>();
+			}
+			Dictionary<string,string> result = new Dictionary<string,string>();
+			try 
+			{
+				StreamReader lnr = new StreamReader(wordstemfile.FullName);
+				string line;
+				string[] wordstem;
+				while ((line = lnr.ReadLine()) != null)
+				{
+					wordstem = line.Split(new char[]{'\t'},2);
+					result.Add(wordstem[0], wordstem[1]);
+			   }
+			}
+			catch (IOException) 
+			{
+			}
+			return result;
+		}
+
+		/// <summary>
+		/// 
+		/// </summary>
+		/// <param name="wordfile">File containing the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(FileInfo wordfile) 
+		{
+			if ( wordfile == null ) 
+			{
+				return new List<string>();
+			}
+            ICollection<string> result = null;
+			try 
+			{
+				StreamReader lnr = new StreamReader(wordfile.FullName);
+				String word = null;
+				String[] stopwords = new String[100];
+				int wordcount = 0;
+				while ( ( word = lnr.ReadLine() ) != null ) 
+				{
+					wordcount++;
+					if ( wordcount == stopwords.Length ) 
+					{
+						String[] tmp = new String[stopwords.Length + 50];
+						Array.Copy( stopwords, 0, tmp, 0, wordcount );
+						stopwords = tmp;
+					}
+					stopwords[wordcount-1] = word;
+				}
+				result = MakeWordTable( stopwords, wordcount );
+			}
+				// On error, use an empty table
+			catch (IOException) 
+			{
+				result = new List<string>();
+			}
+			return result;
+		}
+
+		/// <summary>
+		/// Builds the wordlist table.
+		/// </summary>
+		/// <param name="words">Word that where read</param>
+		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
+		/// <returns></returns>
+        private static ICollection<string> MakeWordTable(String[] words, int length) 
+		{
+            List<string> table = new List<string>(length);
+			for ( int i = 0; i < length; i++ ) 
+			{
+				table.Add(words[i]);
+			}
+			return table;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs Fri Jan 13 08:42:34 2012
@@ -1,36 +1,57 @@
-using System.Reflection;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-// General Information about an assembly is controlled through the following 
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Analyzers")]
-[assembly: AssemblyDescription("")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyCompany("The Apache Software Foundation")]
-[assembly: AssemblyProduct("Lucene.Net.Analyzers")]
-[assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
-[assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
-[assembly: AssemblyCulture("")]
-
-// Setting ComVisible to false makes the types in this assembly not visible 
-// to COM components.  If you need to access a type in this assembly from 
-// COM, set the ComVisible attribute to true on that type.
-[assembly: ComVisible(false)]
-
-// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("36a962fb-a8be-4238-88c4-32568216e247")]
-
-// Version information for an assembly consists of the following four values:
-//
-//      Major Version
-//      Minor Version 
-//      Build Number
-//      Revision
-//
-// You can specify all the values or you can default the Build and Revision Numbers 
-// by using the '*' as shown below:
-// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("2.9.4.2")]
-[assembly: AssemblyFileVersion("2.9.4.2")]
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analyzers")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("The Apache Software Foundation")]
+[assembly: AssemblyProduct("Lucene.Net.Analyzers")]
+[assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
+[assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("36a962fb-a8be-4238-88c4-32568216e247")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("2.9.4.2")]
+[assembly: AssemblyFileVersion("2.9.4.2")]

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs Fri Jan 13 08:42:34 2012
@@ -1,252 +1,273 @@
-using System;
-using System.Text;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-using Lucene.Net.Analysis;
-
-namespace Lucene.Net.Analysis.Ru
-{
-	/// <summary>
-	/// Analyzer for Russian language. Supports an external list of stopwords (words that
-	/// will not be indexed at all).
-	/// A default set of stopwords is used unless an alternative list is specified.
-	/// </summary>
-	public sealed class RussianAnalyzer : Analyzer
-	{
-		// letters
-		private static char A = (char)0;
-		private static char B = (char)1;
-		private static char V = (char)2;
-		private static char G = (char)3;
-		private static char D = (char)4;
-		private static char E = (char)5;
-		private static char ZH = (char)6;
-		private static char Z = (char)7;
-		private static char I = (char)8;
-		private static char I_ = (char)9;
-		private static char K = (char)10;
-		private static char L = (char)11;
-		private static char M = (char)12;
-		private static char N = (char)13;
-		private static char O = (char)14;
-		private static char P = (char)15;
-		private static char R = (char)16;
-		private static char S = (char)17;
-		private static char T = (char)18;
-		private static char U = (char)19;
-		//private static char F = (char)20;
-		private static char X = (char)21;
-		//private static char TS = (char)22;
-		private static char CH = (char)23;
-		private static char SH = (char)24;
-		private static char SHCH = (char)25;
-		//private static char HARD = (char)26;
-		private static char Y = (char)27;
-		private static char SOFT = (char)28;
-		private static char AE = (char)29;
-		private static char IU = (char)30;
-		private static char IA = (char)31;
-
-		/// <summary>
-		/// List of typical Russian stopwords.
-		/// </summary>
-		private static char[][] RUSSIAN_STOP_WORDS = {
-		new char[] {A},
-		new char[] {B, E, Z},
-		new char[] {B, O, L, E, E},
-		new char[] {B, Y},
-		new char[] {B, Y, L},
-		new char[] {B, Y, L, A},
-		new char[] {B, Y, L, I},
-		new char[] {B, Y, L, O},
-		new char[] {B, Y, T, SOFT},
-		new char[] {V},
-		new char[] {V, A, M},
-		new char[] {V, A, S},
-		new char[] {V, E, S, SOFT},
-		new char[] {V, O},
-		new char[] {V, O, T},
-		new char[] {V, S, E},
-		new char[] {V, S, E, G, O},
-		new char[] {V, S, E, X},
-		new char[] {V, Y},
-		new char[] {G, D, E},
-		new char[] {D, A},
-		new char[] {D, A, ZH, E},
-		new char[] {D, L, IA},
-		new char[] {D, O},
-		new char[] {E, G, O},
-		new char[] {E, E},
-		new char[] {E, I_,},
-		new char[] {E, IU},
-		new char[] {E, S, L, I},
-		new char[] {E, S, T, SOFT},
-		new char[] {E, SHCH, E},
-		new char[] {ZH, E},
-		new char[] {Z, A},
-		new char[] {Z, D, E, S, SOFT},
-		new char[] {I},
-		new char[] {I, Z},
-		new char[] {I, L, I},
-		new char[] {I, M},
-		new char[] {I, X},
-		new char[] {K},
-		new char[] {K, A, K},
-		new char[] {K, O},
-		new char[] {K, O, G, D, A},
-		new char[] {K, T, O},
-		new char[] {L, I},
-		new char[] {L, I, B, O},
-		new char[] {M, N, E},
-		new char[] {M, O, ZH, E, T},
-		new char[] {M, Y},
-		new char[] {N, A},
-		new char[] {N, A, D, O},
-		new char[] {N, A, SH},
-		new char[] {N, E},
-		new char[] {N, E, G, O},
-		new char[] {N, E, E},
-		new char[] {N, E, T},
-		new char[] {N, I},
-		new char[] {N, I, X},
-		new char[] {N, O},
-		new char[] {N, U},
-		new char[] {O},
-		new char[] {O, B},
-		new char[] {O, D, N, A, K, O},
-		new char[] {O, N},
-		new char[] {O, N, A},
-		new char[] {O, N, I},
-		new char[] {O, N, O},
-		new char[] {O, T},
-		new char[] {O, CH, E, N, SOFT},
-		new char[] {P, O},
-		new char[] {P, O, D},
-		new char[] {P, R, I},
-		new char[] {S},
-		new char[] {S, O},
-		new char[] {T, A, K},
-		new char[] {T, A, K, ZH, E},
-		new char[] {T, A, K, O, I_},
-		new char[] {T, A, M},
-		new char[] {T, E},
-		new char[] {T, E, M},
-		new char[] {T, O},
-		new char[] {T, O, G, O},
-		new char[] {T, O, ZH, E},
-		new char[] {T, O, I_},
-		new char[] {T, O, L, SOFT, K, O},
-		new char[] {T, O, M},
-		new char[] {T, Y},
-		new char[] {U},
-		new char[] {U, ZH, E},
-		new char[] {X, O, T, IA},
-		new char[] {CH, E, G, O},
-		new char[] {CH, E, I_},
-		new char[] {CH, E, M},
-		new char[] {CH, T, O},
-		new char[] {CH, T, O, B, Y},
-		new char[] {CH, SOFT, E},
-		new char[] {CH, SOFT, IA},
-		new char[] {AE, T, A},
-		new char[] {AE, T, I},
-		new char[] {AE, T, O},
-		new char[] {IA}
-													 };
-
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter.
-		/// </summary>
-        private ICollection<string> stoptable = new List<string>();
-
-		/// <summary>
-		/// Charset for Russian letters.
-	    /// Represents encoding for 32 lowercase Russian letters.
-		/// Predefined charsets can be taken from RussianCharSets class
-		/// </summary>
-		private char[] charset;
-
-		/// <summary>
-		/// Builds an analyzer.
-		/// </summary>
-		public RussianAnalyzer()
-		{
-			this.charset = RussianCharsets.UnicodeRussian;
-			stoptable = StopFilter.MakeStopSet(MakeStopWords(RussianCharsets.UnicodeRussian));
-		}
-
-		/// <summary>
-		/// Builds an analyzer.
-		/// </summary>
-		/// <param name="charset"></param>
-		public RussianAnalyzer(char[] charset)
-		{
-			this.charset = charset;
-			stoptable = StopFilter.MakeStopSet(MakeStopWords(charset));
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		/// <param name="charset"></param>
-		/// <param name="stopwords"></param>
-		public RussianAnalyzer(char[] charset, String[] stopwords)
-		{
-			this.charset = charset;
-			stoptable = StopFilter.MakeStopSet(stopwords);
-		}
-
-		/// <summary>
-		/// Takes russian stop words and translates them to a String array, using
-		/// the given charset 
-		/// </summary>
-		/// <param name="charset"></param>
-		/// <returns></returns>
-		private static String[] MakeStopWords(char[] charset)
-		{
-			String[] res = new String[RUSSIAN_STOP_WORDS.Length];
-			for (int i = 0; i < res.Length; i++)
-			{
-				char[] theStopWord = RUSSIAN_STOP_WORDS[i];
-				// translate the word,using the charset
-				StringBuilder theWord = new StringBuilder();
-				for (int j = 0; j < theStopWord.Length; j++)
-				{
-					theWord.Append(charset[theStopWord[j]]);
-				}
-				res[i] = theWord.ToString();
-			}
-			return res;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		/// <param name="charset"></param>
-		/// <param name="stopwords"></param>
-        public RussianAnalyzer(char[] charset, ICollection<string> stopwords)
-		{
-			this.charset = charset;
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided TextReader.
-		/// </summary>
-		/// <param name="fieldName"></param>
-		/// <param name="reader"></param>
-		/// <returns>
-		///		A TokenStream build from a RussianLetterTokenizer filtered with
-		///     RussianLowerCaseFilter, StopFilter, and RussianStemFilter
-		///  </returns>
-		public override TokenStream TokenStream(String fieldName, TextReader reader)
-		{
-			TokenStream result = new RussianLetterTokenizer(reader, charset);
-			result = new RussianLowerCaseFilter(result, charset);
-			result = new StopFilter(result, stoptable);
-			result = new RussianStemFilter(result, charset);
-			return result;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Text;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Ru
+{
+	/// <summary>
+	/// Analyzer for Russian language. Supports an external list of stopwords (words that
+	/// will not be indexed at all).
+	/// A default set of stopwords is used unless an alternative list is specified.
+	/// </summary>
+	public sealed class RussianAnalyzer : Analyzer
+	{
+		// letters
+		private static char A = (char)0;
+		private static char B = (char)1;
+		private static char V = (char)2;
+		private static char G = (char)3;
+		private static char D = (char)4;
+		private static char E = (char)5;
+		private static char ZH = (char)6;
+		private static char Z = (char)7;
+		private static char I = (char)8;
+		private static char I_ = (char)9;
+		private static char K = (char)10;
+		private static char L = (char)11;
+		private static char M = (char)12;
+		private static char N = (char)13;
+		private static char O = (char)14;
+		private static char P = (char)15;
+		private static char R = (char)16;
+		private static char S = (char)17;
+		private static char T = (char)18;
+		private static char U = (char)19;
+		//private static char F = (char)20;
+		private static char X = (char)21;
+		//private static char TS = (char)22;
+		private static char CH = (char)23;
+		private static char SH = (char)24;
+		private static char SHCH = (char)25;
+		//private static char HARD = (char)26;
+		private static char Y = (char)27;
+		private static char SOFT = (char)28;
+		private static char AE = (char)29;
+		private static char IU = (char)30;
+		private static char IA = (char)31;
+
+		/// <summary>
+		/// List of typical Russian stopwords.
+		/// </summary>
+		private static char[][] RUSSIAN_STOP_WORDS = {
+		new char[] {A},
+		new char[] {B, E, Z},
+		new char[] {B, O, L, E, E},
+		new char[] {B, Y},
+		new char[] {B, Y, L},
+		new char[] {B, Y, L, A},
+		new char[] {B, Y, L, I},
+		new char[] {B, Y, L, O},
+		new char[] {B, Y, T, SOFT},
+		new char[] {V},
+		new char[] {V, A, M},
+		new char[] {V, A, S},
+		new char[] {V, E, S, SOFT},
+		new char[] {V, O},
+		new char[] {V, O, T},
+		new char[] {V, S, E},
+		new char[] {V, S, E, G, O},
+		new char[] {V, S, E, X},
+		new char[] {V, Y},
+		new char[] {G, D, E},
+		new char[] {D, A},
+		new char[] {D, A, ZH, E},
+		new char[] {D, L, IA},
+		new char[] {D, O},
+		new char[] {E, G, O},
+		new char[] {E, E},
+		new char[] {E, I_,},
+		new char[] {E, IU},
+		new char[] {E, S, L, I},
+		new char[] {E, S, T, SOFT},
+		new char[] {E, SHCH, E},
+		new char[] {ZH, E},
+		new char[] {Z, A},
+		new char[] {Z, D, E, S, SOFT},
+		new char[] {I},
+		new char[] {I, Z},
+		new char[] {I, L, I},
+		new char[] {I, M},
+		new char[] {I, X},
+		new char[] {K},
+		new char[] {K, A, K},
+		new char[] {K, O},
+		new char[] {K, O, G, D, A},
+		new char[] {K, T, O},
+		new char[] {L, I},
+		new char[] {L, I, B, O},
+		new char[] {M, N, E},
+		new char[] {M, O, ZH, E, T},
+		new char[] {M, Y},
+		new char[] {N, A},
+		new char[] {N, A, D, O},
+		new char[] {N, A, SH},
+		new char[] {N, E},
+		new char[] {N, E, G, O},
+		new char[] {N, E, E},
+		new char[] {N, E, T},
+		new char[] {N, I},
+		new char[] {N, I, X},
+		new char[] {N, O},
+		new char[] {N, U},
+		new char[] {O},
+		new char[] {O, B},
+		new char[] {O, D, N, A, K, O},
+		new char[] {O, N},
+		new char[] {O, N, A},
+		new char[] {O, N, I},
+		new char[] {O, N, O},
+		new char[] {O, T},
+		new char[] {O, CH, E, N, SOFT},
+		new char[] {P, O},
+		new char[] {P, O, D},
+		new char[] {P, R, I},
+		new char[] {S},
+		new char[] {S, O},
+		new char[] {T, A, K},
+		new char[] {T, A, K, ZH, E},
+		new char[] {T, A, K, O, I_},
+		new char[] {T, A, M},
+		new char[] {T, E},
+		new char[] {T, E, M},
+		new char[] {T, O},
+		new char[] {T, O, G, O},
+		new char[] {T, O, ZH, E},
+		new char[] {T, O, I_},
+		new char[] {T, O, L, SOFT, K, O},
+		new char[] {T, O, M},
+		new char[] {T, Y},
+		new char[] {U},
+		new char[] {U, ZH, E},
+		new char[] {X, O, T, IA},
+		new char[] {CH, E, G, O},
+		new char[] {CH, E, I_},
+		new char[] {CH, E, M},
+		new char[] {CH, T, O},
+		new char[] {CH, T, O, B, Y},
+		new char[] {CH, SOFT, E},
+		new char[] {CH, SOFT, IA},
+		new char[] {AE, T, A},
+		new char[] {AE, T, I},
+		new char[] {AE, T, O},
+		new char[] {IA}
+													 };
+
+		/// <summary>
+		/// Contains the stopwords used with the StopFilter.
+		/// </summary>
+        private ICollection<string> stoptable = new List<string>();
+
+		/// <summary>
+		/// Charset for Russian letters.
+	    /// Represents encoding for 32 lowercase Russian letters.
+		/// Predefined charsets can be taken from RussianCharSets class
+		/// </summary>
+		private char[] charset;
+
+		/// <summary>
+		/// Builds an analyzer.
+		/// </summary>
+		public RussianAnalyzer()
+		{
+			this.charset = RussianCharsets.UnicodeRussian;
+			stoptable = StopFilter.MakeStopSet(MakeStopWords(RussianCharsets.UnicodeRussian));
+		}
+
+		/// <summary>
+		/// Builds an analyzer.
+		/// </summary>
+		/// <param name="charset"></param>
+		public RussianAnalyzer(char[] charset)
+		{
+			this.charset = charset;
+			stoptable = StopFilter.MakeStopSet(MakeStopWords(charset));
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+		/// <param name="charset"></param>
+		/// <param name="stopwords"></param>
+		public RussianAnalyzer(char[] charset, String[] stopwords)
+		{
+			this.charset = charset;
+			stoptable = StopFilter.MakeStopSet(stopwords);
+		}
+
+		/// <summary>
+		/// Takes russian stop words and translates them to a String array, using
+		/// the given charset 
+		/// </summary>
+		/// <param name="charset"></param>
+		/// <returns></returns>
+		private static String[] MakeStopWords(char[] charset)
+		{
+			String[] res = new String[RUSSIAN_STOP_WORDS.Length];
+			for (int i = 0; i < res.Length; i++)
+			{
+				char[] theStopWord = RUSSIAN_STOP_WORDS[i];
+				// translate the word,using the charset
+				StringBuilder theWord = new StringBuilder();
+				for (int j = 0; j < theStopWord.Length; j++)
+				{
+					theWord.Append(charset[theStopWord[j]]);
+				}
+				res[i] = theWord.ToString();
+			}
+			return res;
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+		/// <param name="charset"></param>
+		/// <param name="stopwords"></param>
+        public RussianAnalyzer(char[] charset, ICollection<string> stopwords)
+		{
+			this.charset = charset;
+			stoptable = stopwords;
+		}
+
+		/// <summary>
+		/// Creates a TokenStream which tokenizes all the text in the provided TextReader.
+		/// </summary>
+		/// <param name="fieldName"></param>
+		/// <param name="reader"></param>
+		/// <returns>
+		///		A TokenStream build from a RussianLetterTokenizer filtered with
+		///     RussianLowerCaseFilter, StopFilter, and RussianStemFilter
+		///  </returns>
+		public override TokenStream TokenStream(String fieldName, TextReader reader)
+		{
+			TokenStream result = new RussianLetterTokenizer(reader, charset);
+			result = new RussianLowerCaseFilter(result, charset);
+			result = new StopFilter(result, stoptable);
+			result = new RussianStemFilter(result, charset);
+			return result;
+		}
+	}
+}



Mime
View raw message