lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pnas...@apache.org
Subject [Lucene.Net] svn commit: r1230919 [8/22] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: ./ build/scripts/ build/vs2010/contrib/ build/vs2010/core/ build/vs2010/demo/ build/vs2010/test/ src/contrib/Analyzers/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/...
Date Fri, 13 Jan 2012 08:42:38 GMT
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianCharsets.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianCharsets.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianCharsets.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianCharsets.cs Fri Jan 13 08:42:34 2012
@@ -1,270 +1,291 @@
-using System;
-
-namespace Lucene.Net.Analysis.Ru
-{
-	/// <summary>
-	/// RussianCharsets class contains encodings schemes (charsets) and ToLowerCase() method implementation
-	/// for russian characters in Unicode, KOI8 and CP1252.
-	/// Each encoding scheme contains lowercase (positions 0-31) and uppercase (position 32-63) characters.
-	/// One should be able to add other encoding schemes (like ISO-8859-5 or customized) by adding a new charset
-	/// and adding logic to ToLowerCase() method for that charset.
-	/// </summary>
-	public class RussianCharsets
-	{
-		/// <summary>
-		/// Unicode Russian charset (lowercase letters only)
-		/// </summary>
-		public static char[] UnicodeRussian = {
-												  '\u0430',
-												  '\u0431',
-												  '\u0432',
-												  '\u0433',
-												  '\u0434',
-												  '\u0435',
-												  '\u0436',
-												  '\u0437',
-												  '\u0438',
-												  '\u0439',
-												  '\u043A',
-												  '\u043B',
-												  '\u043C',
-												  '\u043D',
-												  '\u043E',
-												  '\u043F',
-												  '\u0440',
-												  '\u0441',
-												  '\u0442',
-												  '\u0443',
-												  '\u0444',
-												  '\u0445',
-												  '\u0446',
-												  '\u0447',
-												  '\u0448',
-												  '\u0449',
-												  '\u044A',
-												  '\u044B',
-												  '\u044C',
-												  '\u044D',
-												  '\u044E',
-												  '\u044F',
-												  // upper case
-												  '\u0410',
-												  '\u0411',
-												  '\u0412',
-												  '\u0413',
-												  '\u0414',
-												  '\u0415',
-												  '\u0416',
-												  '\u0417',
-												  '\u0418',
-												  '\u0419',
-												  '\u041A',
-												  '\u041B',
-												  '\u041C',
-												  '\u041D',
-												  '\u041E',
-												  '\u041F',
-												  '\u0420',
-												  '\u0421',
-												  '\u0422',
-												  '\u0423',
-												  '\u0424',
-												  '\u0425',
-												  '\u0426',
-												  '\u0427',
-												  '\u0428',
-												  '\u0429',
-												  '\u042A',
-												  '\u042B',
-												  '\u042C',
-												  '\u042D',
-												  '\u042E',
-												  '\u042F'
-											  };
-
-		/// <summary>
-		/// KOI8 charset
-		/// </summary>
-		public static char[] KOI8 = {
-										(char)0xc1,
-										(char)0xc2,
-										(char)0xd7,
-										(char)0xc7,
-										(char)0xc4,
-										(char)0xc5,
-										(char)0xd6,
-										(char)0xda,
-										(char)0xc9,
-										(char)0xca,
-										(char)0xcb,
-										(char)0xcc,
-										(char)0xcd,
-										(char)0xce,
-										(char)0xcf,
-										(char)0xd0,
-										(char)0xd2,
-										(char)0xd3,
-										(char)0xd4,
-										(char)0xd5,
-										(char)0xc6,
-										(char)0xc8,
-										(char)0xc3,
-										(char)0xde,
-										(char)0xdb,
-										(char)0xdd,
-										(char)0xdf,
-										(char)0xd9,
-										(char)0xd8,
-										(char)0xdc,
-										(char)0xc0,
-										(char)0xd1,
-										// upper case
-										(char)0xe1,
-										(char)0xe2,
-										(char)0xf7,
-										(char)0xe7,
-										(char)0xe4,
-										(char)0xe5,
-										(char)0xf6,
-										(char)0xfa,
-										(char)0xe9,
-										(char)0xea,
-										(char)0xeb,
-										(char)0xec,
-										(char)0xed,
-										(char)0xee,
-										(char)0xef,
-										(char)0xf0,
-										(char)0xf2,
-										(char)0xf3,
-										(char)0xf4,
-										(char)0xf5,
-										(char)0xe6,
-										(char)0xe8,
-										(char)0xe3,
-										(char)0xfe,
-										(char)0xfb,
-										(char)0xfd,
-										(char)0xff,
-										(char)0xf9,
-										(char)0xf8,
-										(char)0xfc,
-										(char)0xe0,
-										(char)0xf1
-									};
-
-		/// <summary>
-		/// CP1251 Charset
-		/// </summary>
-		public static char[] CP1251 = {
-										  (char)0xE0,
-										  (char)0xE1,
-										  (char)0xE2,
-										  (char)0xE3,
-										  (char)0xE4,
-										  (char)0xE5,
-										  (char)0xE6,
-										  (char)0xE7,
-										  (char)0xE8,
-										  (char)0xE9,
-										  (char)0xEA,
-										  (char)0xEB,
-										  (char)0xEC,
-										  (char)0xED,
-										  (char)0xEE,
-										  (char)0xEF,
-										  (char)0xF0,
-										  (char)0xF1,
-										  (char)0xF2,
-										  (char)0xF3,
-										  (char)0xF4,
-										  (char)0xF5,
-										  (char)0xF6,
-										  (char)0xF7,
-										  (char)0xF8,
-										  (char)0xF9,
-										  (char)0xFA,
-										  (char)0xFB,
-										  (char)0xFC,
-										  (char)0xFD,
-										  (char)0xFE,
-										  (char)0xFF,
-										  // upper case
-										  (char)0xC0,
-										  (char)0xC1,
-										  (char)0xC2,
-										  (char)0xC3,
-										  (char)0xC4,
-										  (char)0xC5,
-										  (char)0xC6,
-										  (char)0xC7,
-										  (char)0xC8,
-										  (char)0xC9,
-										  (char)0xCA,
-										  (char)0xCB,
-										  (char)0xCC,
-										  (char)0xCD,
-										  (char)0xCE,
-										  (char)0xCF,
-										  (char)0xD0,
-										  (char)0xD1,
-										  (char)0xD2,
-										  (char)0xD3,
-										  (char)0xD4,
-										  (char)0xD5,
-										  (char)0xD6,
-										  (char)0xD7,
-										  (char)0xD8,
-										  (char)0xD9,
-										  (char)0xDA,
-										  (char)0xDB,
-										  (char)0xDC,
-										  (char)0xDD,
-										  (char)0xDE,
-										  (char)0xDF
-									  };
-
-		public static char ToLowerCase(char letter, char[] charset)
-		{
-			if (charset == UnicodeRussian)
-			{
-				if (letter >= '\u0430' && letter <= '\u044F')
-				{
-					return letter;
-				}
-				if (letter >= '\u0410' && letter <= '\u042F')
-				{
-					return (char) (letter + 32);
-				}
-			}
-
-			if (charset == KOI8)
-			{
-				if (letter >= 0xe0 && letter <= 0xff)
-				{
-					return (char) (letter - 32);
-				}
-				if (letter >= 0xc0 && letter <= 0xdf)
-				{
-					return letter;
-				}
-
-			}
-
-			if (charset == CP1251)
-			{
-				if (letter >= 0xC0 && letter <= 0xDF)
-				{
-					return (char) (letter + 32);
-				}
-				if (letter >= 0xE0 && letter <= 0xFF)
-				{
-					return letter;
-				}
-
-			}
-
-			return Char.ToLower(letter);
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+
+namespace Lucene.Net.Analysis.Ru
+{
+	/// <summary>
+	/// RussianCharsets class contains encodings schemes (charsets) and ToLowerCase() method implementation
+	/// for russian characters in Unicode, KOI8 and CP1252.
+	/// Each encoding scheme contains lowercase (positions 0-31) and uppercase (position 32-63) characters.
+	/// One should be able to add other encoding schemes (like ISO-8859-5 or customized) by adding a new charset
+	/// and adding logic to ToLowerCase() method for that charset.
+	/// </summary>
+	public class RussianCharsets
+	{
+		/// <summary>
+		/// Unicode Russian charset (lowercase letters only)
+		/// </summary>
+		public static char[] UnicodeRussian = {
+												  '\u0430',
+												  '\u0431',
+												  '\u0432',
+												  '\u0433',
+												  '\u0434',
+												  '\u0435',
+												  '\u0436',
+												  '\u0437',
+												  '\u0438',
+												  '\u0439',
+												  '\u043A',
+												  '\u043B',
+												  '\u043C',
+												  '\u043D',
+												  '\u043E',
+												  '\u043F',
+												  '\u0440',
+												  '\u0441',
+												  '\u0442',
+												  '\u0443',
+												  '\u0444',
+												  '\u0445',
+												  '\u0446',
+												  '\u0447',
+												  '\u0448',
+												  '\u0449',
+												  '\u044A',
+												  '\u044B',
+												  '\u044C',
+												  '\u044D',
+												  '\u044E',
+												  '\u044F',
+												  // upper case
+												  '\u0410',
+												  '\u0411',
+												  '\u0412',
+												  '\u0413',
+												  '\u0414',
+												  '\u0415',
+												  '\u0416',
+												  '\u0417',
+												  '\u0418',
+												  '\u0419',
+												  '\u041A',
+												  '\u041B',
+												  '\u041C',
+												  '\u041D',
+												  '\u041E',
+												  '\u041F',
+												  '\u0420',
+												  '\u0421',
+												  '\u0422',
+												  '\u0423',
+												  '\u0424',
+												  '\u0425',
+												  '\u0426',
+												  '\u0427',
+												  '\u0428',
+												  '\u0429',
+												  '\u042A',
+												  '\u042B',
+												  '\u042C',
+												  '\u042D',
+												  '\u042E',
+												  '\u042F'
+											  };
+
+		/// <summary>
+		/// KOI8 charset
+		/// </summary>
+		public static char[] KOI8 = {
+										(char)0xc1,
+										(char)0xc2,
+										(char)0xd7,
+										(char)0xc7,
+										(char)0xc4,
+										(char)0xc5,
+										(char)0xd6,
+										(char)0xda,
+										(char)0xc9,
+										(char)0xca,
+										(char)0xcb,
+										(char)0xcc,
+										(char)0xcd,
+										(char)0xce,
+										(char)0xcf,
+										(char)0xd0,
+										(char)0xd2,
+										(char)0xd3,
+										(char)0xd4,
+										(char)0xd5,
+										(char)0xc6,
+										(char)0xc8,
+										(char)0xc3,
+										(char)0xde,
+										(char)0xdb,
+										(char)0xdd,
+										(char)0xdf,
+										(char)0xd9,
+										(char)0xd8,
+										(char)0xdc,
+										(char)0xc0,
+										(char)0xd1,
+										// upper case
+										(char)0xe1,
+										(char)0xe2,
+										(char)0xf7,
+										(char)0xe7,
+										(char)0xe4,
+										(char)0xe5,
+										(char)0xf6,
+										(char)0xfa,
+										(char)0xe9,
+										(char)0xea,
+										(char)0xeb,
+										(char)0xec,
+										(char)0xed,
+										(char)0xee,
+										(char)0xef,
+										(char)0xf0,
+										(char)0xf2,
+										(char)0xf3,
+										(char)0xf4,
+										(char)0xf5,
+										(char)0xe6,
+										(char)0xe8,
+										(char)0xe3,
+										(char)0xfe,
+										(char)0xfb,
+										(char)0xfd,
+										(char)0xff,
+										(char)0xf9,
+										(char)0xf8,
+										(char)0xfc,
+										(char)0xe0,
+										(char)0xf1
+									};
+
+		/// <summary>
+		/// CP1251 Charset
+		/// </summary>
+		public static char[] CP1251 = {
+										  (char)0xE0,
+										  (char)0xE1,
+										  (char)0xE2,
+										  (char)0xE3,
+										  (char)0xE4,
+										  (char)0xE5,
+										  (char)0xE6,
+										  (char)0xE7,
+										  (char)0xE8,
+										  (char)0xE9,
+										  (char)0xEA,
+										  (char)0xEB,
+										  (char)0xEC,
+										  (char)0xED,
+										  (char)0xEE,
+										  (char)0xEF,
+										  (char)0xF0,
+										  (char)0xF1,
+										  (char)0xF2,
+										  (char)0xF3,
+										  (char)0xF4,
+										  (char)0xF5,
+										  (char)0xF6,
+										  (char)0xF7,
+										  (char)0xF8,
+										  (char)0xF9,
+										  (char)0xFA,
+										  (char)0xFB,
+										  (char)0xFC,
+										  (char)0xFD,
+										  (char)0xFE,
+										  (char)0xFF,
+										  // upper case
+										  (char)0xC0,
+										  (char)0xC1,
+										  (char)0xC2,
+										  (char)0xC3,
+										  (char)0xC4,
+										  (char)0xC5,
+										  (char)0xC6,
+										  (char)0xC7,
+										  (char)0xC8,
+										  (char)0xC9,
+										  (char)0xCA,
+										  (char)0xCB,
+										  (char)0xCC,
+										  (char)0xCD,
+										  (char)0xCE,
+										  (char)0xCF,
+										  (char)0xD0,
+										  (char)0xD1,
+										  (char)0xD2,
+										  (char)0xD3,
+										  (char)0xD4,
+										  (char)0xD5,
+										  (char)0xD6,
+										  (char)0xD7,
+										  (char)0xD8,
+										  (char)0xD9,
+										  (char)0xDA,
+										  (char)0xDB,
+										  (char)0xDC,
+										  (char)0xDD,
+										  (char)0xDE,
+										  (char)0xDF
+									  };
+
+		public static char ToLowerCase(char letter, char[] charset)
+		{
+			if (charset == UnicodeRussian)
+			{
+				if (letter >= '\u0430' && letter <= '\u044F')
+				{
+					return letter;
+				}
+				if (letter >= '\u0410' && letter <= '\u042F')
+				{
+					return (char) (letter + 32);
+				}
+			}
+
+			if (charset == KOI8)
+			{
+				if (letter >= 0xe0 && letter <= 0xff)
+				{
+					return (char) (letter - 32);
+				}
+				if (letter >= 0xc0 && letter <= 0xdf)
+				{
+					return letter;
+				}
+
+			}
+
+			if (charset == CP1251)
+			{
+				if (letter >= 0xC0 && letter <= 0xDF)
+				{
+					return (char) (letter + 32);
+				}
+				if (letter >= 0xE0 && letter <= 0xFF)
+				{
+					return letter;
+				}
+
+			}
+
+			return Char.ToLower(letter);
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs Fri Jan 13 08:42:34 2012
@@ -1,42 +1,63 @@
-using System;
-using System.IO;
-using Lucene.Net.Analysis;
-
-namespace Lucene.Net.Analysis.Ru
-{
-	/// <summary>
-	/// A RussianLetterTokenizer is a tokenizer that extends LetterTokenizer by additionally looking up letters
-	/// in a given "russian charset". The problem with LeterTokenizer is that it uses Character.isLetter() method,
-	/// which doesn't know how to detect letters in encodings like CP1252 and KOI8
-	/// (well-known problems with 0xD7 and 0xF7 chars)
-	/// </summary>
-	public class RussianLetterTokenizer : CharTokenizer
-	{
-		/// <summary>
-		/// Construct a new LetterTokenizer.
-		/// </summary>
-		private char[] charset;
-
-		public RussianLetterTokenizer(TextReader _in, char[] charset) : base(_in)
-		{
-			this.charset = charset;
-		}
-
-		/// <summary>
-		/// Collects only characters which satisfy Char.IsLetter(char).
-		/// </summary>
-		/// <param name="c"></param>
-		/// <returns></returns>
-		protected override bool IsTokenChar(char c)
-		{
-			if (Char.IsLetter(c))
-				return true;
-			for (int i = 0; i < charset.Length; i++)
-			{
-				if (c == charset[i])
-					return true;
-			}
-			return false;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Ru
+{
+	/// <summary>
+	/// A RussianLetterTokenizer is a tokenizer that extends LetterTokenizer by additionally looking up letters
+	/// in a given "russian charset". The problem with LeterTokenizer is that it uses Character.isLetter() method,
+	/// which doesn't know how to detect letters in encodings like CP1252 and KOI8
+	/// (well-known problems with 0xD7 and 0xF7 chars)
+	/// </summary>
+	public class RussianLetterTokenizer : CharTokenizer
+	{
+		/// <summary>
+		/// Construct a new LetterTokenizer.
+		/// </summary>
+		private char[] charset;
+
+		public RussianLetterTokenizer(TextReader _in, char[] charset) : base(_in)
+		{
+			this.charset = charset;
+		}
+
+		/// <summary>
+		/// Collects only characters which satisfy Char.IsLetter(char).
+		/// </summary>
+		/// <param name="c"></param>
+		/// <returns></returns>
+		protected override bool IsTokenChar(char c)
+		{
+			if (Char.IsLetter(c))
+				return true;
+			for (int i = 0; i < charset.Length; i++)
+			{
+				if (c == charset[i])
+					return true;
+			}
+			return false;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs Fri Jan 13 08:42:34 2012
@@ -1,40 +1,61 @@
-using System;
-using Lucene.Net.Analysis;
-
-namespace Lucene.Net.Analysis.Ru
-{
-	/// <summary>
-	/// Normalizes token text to lower case, analyzing given ("russian") charset.
-	/// </summary>
-	public sealed class RussianLowerCaseFilter : TokenFilter
-	{
-		char[] charset;
-
-		public RussianLowerCaseFilter(TokenStream _in, char[] charset) : base(_in)
-		{
-			this.charset = charset;
-		}
-
-		public override Token Next() 
-		{
-			Token t = input.Next();
-
-			if (t == null)
-				return null;
-
-			String txt = t.TermText();
-
-			char[] chArray = txt.ToCharArray();
-			for (int i = 0; i < chArray.Length; i++)
-			{
-				chArray[i] = RussianCharsets.ToLowerCase(chArray[i], charset);
-			}
-
-			String newTxt = new String(chArray);
-			// create new token
-			Token newToken = new Token(newTxt, t.StartOffset(), t.EndOffset());
-
-			return newToken;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Ru
+{
+	/// <summary>
+	/// Normalizes token text to lower case, analyzing given ("russian") charset.
+	/// </summary>
+	public sealed class RussianLowerCaseFilter : TokenFilter
+	{
+		char[] charset;
+
+		public RussianLowerCaseFilter(TokenStream _in, char[] charset) : base(_in)
+		{
+			this.charset = charset;
+		}
+
+		public override Token Next() 
+		{
+			Token t = input.Next();
+
+			if (t == null)
+				return null;
+
+			String txt = t.TermText();
+
+			char[] chArray = txt.ToCharArray();
+			for (int i = 0; i < chArray.Length; i++)
+			{
+				chArray[i] = RussianCharsets.ToLowerCase(chArray[i], charset);
+			}
+
+			String newTxt = new String(chArray);
+			// create new token
+			Token newToken = new Token(newTxt, t.StartOffset(), t.EndOffset());
+
+			return newToken;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemFilter.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemFilter.cs Fri Jan 13 08:42:34 2012
@@ -1,58 +1,79 @@
-using System;
-using Lucene.Net.Analysis;
-
-namespace Lucene.Net.Analysis.Ru
-{
-	/// <summary>
-	/// A filter that stems Russian words. The implementation was inspired by GermanStemFilter.
-	/// The input should be filtered by RussianLowerCaseFilter before passing it to RussianStemFilter,
-	/// because RussianStemFilter only works  with lowercase part of any "russian" charset.
-	/// </summary>
-	public sealed class RussianStemFilter : TokenFilter
-	{
-		/// <summary>
-		/// The actual token in the input stream.
-		/// </summary>
-		private Token token = null;
-		private RussianStemmer stemmer = null;
-
-		public RussianStemFilter(TokenStream _in, char[] charset) : base(_in)
-		{
-			stemmer = new RussianStemmer(charset);
-		}
-
-		/// <summary>
-		/// 
-		/// </summary>
-		/// <returns>Returns the next token in the stream, or null at EOS</returns>
-		public override Token Next() 
-		{
-			if ((token = input.Next()) == null)
-			{
-				return null;
-			}
-			else
-			{
-				String s = stemmer.Stem(token.TermText());
-				if (!s.Equals(token.TermText()))
-				{
-					return new Token(s, token.StartOffset(), token.EndOffset(),
-						token.Type());
-				}
-				return token;
-			}
-		}
-
-		/// <summary>
-		/// Set a alternative/custom RussianStemmer for this filter.
-		/// </summary>
-		/// <param name="stemmer"></param>
-		public void SetStemmer(RussianStemmer stemmer)
-		{
-			if (stemmer != null)
-			{
-				this.stemmer = stemmer;
-			}
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Ru
+{
+	/// <summary>
+	/// A filter that stems Russian words. The implementation was inspired by GermanStemFilter.
+	/// The input should be filtered by RussianLowerCaseFilter before passing it to RussianStemFilter,
+	/// because RussianStemFilter only works  with lowercase part of any "russian" charset.
+	/// </summary>
+	public sealed class RussianStemFilter : TokenFilter
+	{
+		/// <summary>
+		/// The actual token in the input stream.
+		/// </summary>
+		private Token token = null;
+		private RussianStemmer stemmer = null;
+
+		public RussianStemFilter(TokenStream _in, char[] charset) : base(_in)
+		{
+			stemmer = new RussianStemmer(charset);
+		}
+
+		/// <summary>
+		/// 
+		/// </summary>
+		/// <returns>Returns the next token in the stream, or null at EOS</returns>
+		public override Token Next() 
+		{
+			if ((token = input.Next()) == null)
+			{
+				return null;
+			}
+			else
+			{
+				String s = stemmer.Stem(token.TermText());
+				if (!s.Equals(token.TermText()))
+				{
+					return new Token(s, token.StartOffset(), token.EndOffset(),
+						token.Type());
+				}
+				return token;
+			}
+		}
+
+		/// <summary>
+		/// Set a alternative/custom RussianStemmer for this filter.
+		/// </summary>
+		/// <param name="stemmer"></param>
+		public void SetStemmer(RussianStemmer stemmer)
+		{
+			if (stemmer != null)
+			{
+				this.stemmer = stemmer;
+			}
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemmer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemmer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianStemmer.cs Fri Jan 13 08:42:34 2012
@@ -1,828 +1,849 @@
-using System;
-using System.Text;
-
-namespace Lucene.Net.Analysis.Ru
-{
-	/// <summary>
-	/// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
-	/// </summary>
-	public class RussianStemmer
-	{
-		private char[] charset;
-
-		/// <summary>
-		/// positions of RV, R1 and R2 respectively
-		/// </summary>
-		private int RV, R1, R2;
-
-		/// <summary>
-		/// letters
-		/// </summary>
-		// letters (currently unused letters are commented out)
-		private static char A = (char)0;
-		//private static char B = (char)1;
-		private static char V = (char)2;
-		private static char G = (char)3;
-		//private static char D = (char)4;
-		private static char E = (char)5;
-		//private static char ZH = (char)6;
-		//private static char Z = (char)7;
-		private static char I = (char)8;
-		private static char I_ = (char)9;
-		//private static char K = (char)10;
-		private static char L = (char)11;
-		private static char M = (char)12;
-		private static char N = (char)13;
-		private static char O = (char)14;
-		//private static char P = (char)15;
-		//private static char R = (char)16;
-		private static char S = (char)17;
-		private static char T = (char)18;
-		private static char U = (char)19;
-		//private static char F = (char)20;
-		private static char X = (char)21;
-		//private static char TS = (char)22;
-		//private static char CH = (char)23;
-		private static char SH = (char)24;
-		private static char SHCH = (char)25;
-		//private static char HARD = (char)26;
-		private static char Y = (char)27;
-		private static char SOFT = (char)28;
-		private static char AE = (char)29;
-		private static char IU = (char)30;
-		private static char IA = (char)31;
-
-		/// <summary>
-		/// stem definitions
-		/// </summary>
-		private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
-
-		private static char[][] perfectiveGerundEndings1 = {
-															   new char[] { V },
-															   new char[] { V, SH, I },
-															   new char[] { V, SH, I, S, SOFT }
-														   };
-
-		private static char[][] perfectiveGerund1Predessors = {
-																  new char[] { A },
-																  new char[] { IA }
-															  };
-
-		private static char[][] perfectiveGerundEndings2 = { 
-															   new char[] { I, V }, 
-															   new char[] {Y, V }, 
-															   new char[] {I, V, SH, I }, 
-															   new char[] {Y, V, SH, I }, 
-															   new char[] {I, V, SH, I, S, SOFT }, 
-															   new char[] {Y, V, SH, I, S, SOFT }
-														   };
-
-		private static char[][] adjectiveEndings = {
-													   new char[] { E, E },
-													   new char[] { I, E },
-													   new char[] { Y, E },
-													   new char[] { O, E },
-													   new char[] { E, I_ },
-													   new char[] { I, I_ },
-													   new char[] { Y, I_ },
-													   new char[] { O, I_ },
-													   new char[] { E, M },
-													   new char[] { I, M },
-													   new char[] { Y, M },
-													   new char[] { O, M },
-													   new char[] { I, X },
-													   new char[] { Y, X },
-													   new char[] { U, IU },
-													   new char[] { IU, IU },
-													   new char[] { A, IA },
-													   new char[] { IA, IA },
-													   new char[] { O, IU },
-													   new char[] { E, IU },
-													   new char[] { I, M, I },
-													   new char[] { Y, M, I },
-													   new char[] { E, G, O },
-													   new char[] { O, G, O },
-													   new char[] { E, M, U },
-													   new char[] {O, M, U }
-												   };
-
-		private static char[][] participleEndings1 = {
-														 new char[] { SHCH },
-														 new char[] { E, M },
-														 new char[] { N, N },
-														 new char[] { V, SH },
-														 new char[] { IU, SHCH }
-													 };
-
-		private static char[][] participleEndings2 = {
-														 new char[] { I, V, SH },
-														 new char[] { Y, V, SH },
-														 new char[] { U, IU, SHCH }
-													 };
-
-		private static char[][] participle1Predessors = {
-															new char[] { A },
-															new char[] { IA }
-														};
-
-		private static char[][] reflexiveEndings = {
-													   new char[] { S, IA },
-													   new char[] { S, SOFT }
-												   };
-
-		private static char[][] verbEndings1 = {
-												   new char[] { I_ },
-												   new char[] { L },
-												   new char[] { N },
-												   new char[] { L, O },
-												   new char[] { N, O },
-												   new char[] { E, T },
-												   new char[] { IU, T },
-												   new char[] { L, A },
-												   new char[] { N, A },
-												   new char[] { L, I },
-												   new char[] { E, M },
-												   new char[] { N, Y },
-												   new char[] { E, T, E },
-												   new char[] { I_, T, E },
-												   new char[] { T, SOFT },
-												   new char[] { E, SH, SOFT },
-												   new char[] { N, N, O }
-											   };
-
-		private static char[][] verbEndings2 = {
-												   new char[] { IU },
-												   new char[] { U, IU },
-												   new char[] { E, N },
-												   new char[] { E, I_ },
-												   new char[] { IA, T },
-												   new char[] { U, I_ },
-												   new char[] { I, L },
-												   new char[] { Y, L },
-												   new char[] { I, M },
-												   new char[] { Y, M },
-												   new char[] { I, T },
-												   new char[] { Y, T },
-												   new char[] { I, L, A },
-												   new char[] { Y, L, A },
-												   new char[] { E, N, A },
-												   new char[] { I, T, E },
-												   new char[] { I, L, I },
-												   new char[] { Y, L, I },
-												   new char[] { I, L, O },
-												   new char[] { Y, L, O },
-												   new char[] { E, N, O },
-												   new char[] { U, E, T },
-												   new char[] { U, IU, T },
-												   new char[] { E, N, Y },
-												   new char[] { I, T, SOFT },
-												   new char[] { Y, T, SOFT },
-												   new char[] { I, SH, SOFT },
-												   new char[] { E, I_, T, E },
-												   new char[] { U, I_, T, E }
-											   };
-
-		private static char[][] verb1Predessors = {
-													  new char[] { A },
-													  new char[] { IA }
-												  };
-
-		private static char[][] nounEndings = {
-												  new char[] { A },
-												  new char[] { U },
-												  new char[] { I_ },
-												  new char[] { O },
-												  new char[] { U },
-												  new char[] { E },
-												  new char[] { Y },
-												  new char[] { I },
-												  new char[] { SOFT },
-												  new char[] { IA },
-												  new char[] { E, V },
-												  new char[] { O, V },
-												  new char[] { I, E },
-												  new char[] { SOFT, E },
-												  new char[] { IA, X },
-												  new char[] { I, IU },
-												  new char[] { E, I },
-												  new char[] { I, I },
-												  new char[] { E, I_ },
-												  new char[] { O, I_ },
-												  new char[] { E, M },
-												  new char[] { A, M },
-												  new char[] { O, M },
-												  new char[] { A, X },
-												  new char[] { SOFT, IU },
-												  new char[] { I, IA },
-												  new char[] { SOFT, IA },
-												  new char[] { I, I_ },
-												  new char[] { IA, M },
-												  new char[] { IA, M, I },
-												  new char[] { A, M, I },
-												  new char[] { I, E, I_ },
-												  new char[] { I, IA, M },
-												  new char[] { I, E, M },
-												  new char[] { I, IA, X },
-												  new char[] { I, IA, M, I }
-											  };
-
-		private static char[][] superlativeEndings = {
-														 new char[] { E, I_, SH },
-														 new char[] { E, I_, SH, E }
-													 };
-
-		private static char[][] derivationalEndings = {
-														  new char[] { O, S, T },
-														  new char[] { O, S, T, SOFT }
-													  };
-
-		/// <summary>
-		/// RussianStemmer constructor comment.
-		/// </summary>
-		public RussianStemmer()
-		{
-		}
-
-		/// <summary>
-		/// RussianStemmer constructor comment.
-		/// </summary>
-		/// <param name="charset"></param>
-		public RussianStemmer(char[] charset)
-		{
-			this.charset = charset;
-		}
-
-		/// <summary>
-		/// Adjectival ending is an adjective ending,
-		/// optionally preceded by participle ending.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone">StringBuilder</param>
-		/// <returns></returns>
-		private bool Adjectival(StringBuilder stemmingZone)
-		{
-			// look for adjective ending in a stemming zone
-			if (!FindAndRemoveEnding(stemmingZone, adjectiveEndings))
-				return false;
-			// if adjective ending was found, try for participle ending
-			bool r =
-				FindAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
-				||
-				FindAndRemoveEnding(stemmingZone, participleEndings2);
-			return true;
-		}
-
-		/// <summary>
-		/// Derivational endings
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone">StringBuilder</param>
-		/// <returns></returns>
-		private bool Derivational(StringBuilder stemmingZone)
-		{
-			int endingLength = FindEnding(stemmingZone, derivationalEndings);
-			if (endingLength == 0)
-				// no derivational ending found
-				return false;
-			else
-			{
-				// Ensure that the ending locates in R2
-				if (R2 - RV <= stemmingZone.Length - endingLength)
-				{
-					stemmingZone.Length = stemmingZone.Length - endingLength;
-					return true;
-				}
-				else
-				{
-					return false;
-				}
-			}
-		}
-
-		/// <summary>
-		/// Finds ending among given ending class and returns the length of ending found(0, if not found).
-		/// Creation date: (17/03/2002 8:18:34 PM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <param name="startIndex"></param>
-		/// <param name="theEndingClass"></param>
-		/// <returns></returns>
-		private int FindEnding(StringBuilder stemmingZone, int startIndex, char[][] theEndingClass)
-		{
-			bool match = false;
-			for (int i = theEndingClass.Length - 1; i >= 0; i--)
-			{
-				char[] theEnding = theEndingClass[i];
-				// check if the ending is bigger than stemming zone
-				if (startIndex < theEnding.Length - 1)
-				{
-					match = false;
-					continue;
-				}
-				match = true;
-				int stemmingIndex = startIndex;
-				for (int j = theEnding.Length - 1; j >= 0; j--)
-				{
-					if (stemmingZone[stemmingIndex--] != charset[theEnding[j]])
-					{
-						match = false;
-						break;
-					}
-				}
-				// check if ending was found
-				if (match)
-				{
-					return theEndingClass[i].Length; // cut ending
-				}
-			}
-			return 0;
-		}
-
-		private int FindEnding(StringBuilder stemmingZone, char[][] theEndingClass)
-		{
-			return FindEnding(stemmingZone, stemmingZone.Length - 1, theEndingClass);
-		}
-
-		/// <summary>
-		/// Finds the ending among the given class of endings and removes it from stemming zone.
-		/// Creation date: (17/03/2002 8:18:34 PM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <param name="theEndingClass"></param>
-		/// <returns></returns>
-		private bool FindAndRemoveEnding(StringBuilder stemmingZone, char[][] theEndingClass)
-		{
-			int endingLength = FindEnding(stemmingZone, theEndingClass);
-			if (endingLength == 0)
-				// not found
-				return false;
-			else 
-			{
-				stemmingZone.Length = stemmingZone.Length - endingLength;
-				// cut the ending found
-				return true;
-			}
-		}
-
-		/// <summary>
-		/// Finds the ending among the given class of endings, then checks if this ending was
-		/// preceded by any of given predessors, and if so, removes it from stemming zone.
-		/// Creation date: (17/03/2002 8:18:34 PM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <param name="theEndingClass"></param>
-		/// <param name="thePredessors"></param>
-		/// <returns></returns>
-		private bool FindAndRemoveEnding(StringBuilder stemmingZone,
-			char[][] theEndingClass, char[][] thePredessors)
-		{
-			int endingLength = FindEnding(stemmingZone, theEndingClass);
-			if (endingLength == 0)
-				// not found
-				return false;
-			else
-			{
-				int predessorLength =
-					FindEnding(stemmingZone,
-					stemmingZone.Length - endingLength - 1,
-					thePredessors);
-				if (predessorLength == 0)
-					return false;
-				else 
-				{
-					stemmingZone.Length = stemmingZone.Length - endingLength;
-					// cut the ending found
-					return true;
-				}
-			}
-
-		}
-
-		/// <summary>
-		/// Marks positions of RV, R1 and R2 in a given word.
-		/// Creation date: (16/03/2002 3:40:11 PM)
-		/// </summary>
-		/// <param name="word"></param>
-		private void MarkPositions(String word)
-		{
-			RV = 0;
-			R1 = 0;
-			R2 = 0;
-			int i = 0;
-			// find RV
-			while (word.Length > i && !IsVowel(word[i]))
-			{
-				i++;
-			}
-			if (word.Length - 1 < ++i)
-				return; // RV zone is empty
-			RV = i;
-			// find R1
-			while (word.Length > i && IsVowel(word[i]))
-			{
-				i++;
-			}
-			if (word.Length - 1 < ++i)
-				return; // R1 zone is empty
-			R1 = i;
-			// find R2
-			while (word.Length > i && !IsVowel(word[i]))
-			{
-				i++;
-			}
-			if (word.Length - 1 < ++i)
-				return; // R2 zone is empty
-			while (word.Length > i && IsVowel(word[i]))
-			{
-				i++;
-			}
-			if (word.Length - 1 < ++i)
-				return; // R2 zone is empty
-			R2 = i;
-		}
-
-		/// <summary>
-		/// Checks if character is a vowel..
-		/// Creation date: (16/03/2002 10:47:03 PM)
-		/// </summary>
-		/// <param name="letter"></param>
-		/// <returns></returns>
-		private bool IsVowel(char letter)
-		{
-			for (int i = 0; i < vowels.Length; i++)
-			{
-				if (letter == charset[vowels[i]])
-					return true;
-			}
-			return false;
-		}
-
-		/// <summary>
-		/// Noun endings.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool Noun(StringBuilder stemmingZone)
-		{
-			return FindAndRemoveEnding(stemmingZone, nounEndings);
-		}
-
-		/// <summary>
-		/// Perfective gerund endings.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool PerfectiveGerund(StringBuilder stemmingZone)
-		{
-			return FindAndRemoveEnding(
-				stemmingZone,
-				perfectiveGerundEndings1,
-				perfectiveGerund1Predessors)
-				|| FindAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
-		}
-
-		/// <summary>
-		/// Reflexive endings.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool Reflexive(StringBuilder stemmingZone)
-		{
-			return FindAndRemoveEnding(stemmingZone, reflexiveEndings);
-		}
-
-		/// <summary>
-		/// Insert the method's description here.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool RemoveI(StringBuilder stemmingZone)
-		{
-			if (stemmingZone.Length > 0
-				&& stemmingZone[stemmingZone.Length - 1] == charset[I])
-			{
-				stemmingZone.Length = stemmingZone.Length - 1;
-				return true;
-			}
-			else
-			{
-				return false;
-			}
-		}
-
-		/// <summary>
-		/// Insert the method's description here.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool RemoveSoft(StringBuilder stemmingZone)
-		{
-			if (stemmingZone.Length > 0
-				&& stemmingZone[stemmingZone.Length - 1] == charset[SOFT])
-			{
-				stemmingZone.Length = stemmingZone.Length - 1;
-				return true;
-			}
-			else
-			{
-				return false;
-			}
-		}
-
-		/// <summary>
-		/// Insert the method's description here.
-		/// Creation date: (16/03/2002 10:58:42 PM)
-		/// </summary>
-		/// <param name="newCharset"></param>
-		public void SetCharset(char[] newCharset)
-		{
-			charset = newCharset;
-		}
-
-//		/// <summary>
-//		/// Set ending definition as in Russian stemming algorithm.
-//		/// Creation date: (16/03/2002 11:16:36 PM)
-//		/// </summary>
-//		private void SetEndings()
-//		{
-//			vowels = new char[] { A, E, I, O, U, Y, AE, IU, IA };
-//
-//			perfectiveGerundEndings1 = new char[][] {
-//														new char[]  { V }, new char[]  { V, SH, I }, new char[]  { V, SH, I, S, SOFT }
-//													};
-//
-//			perfectiveGerund1Predessors = new char[][] { 
-//														   new char[]  { A }, new char[]  { IA }
-//													   };
-//
-//			perfectiveGerundEndings2 = new char[][] {
-//														new char[]  { I, V },
-//														new char[]  { Y, V },
-//														new char[]  { I, V, SH, I },
-//														new char[]  { Y, V, SH, I },
-//														new char[]  { I, V, SH, I, S, SOFT },
-//														new char[]  { Y, V, SH, I, S, SOFT }
-//													};
-//
-//			adjectiveEndings = new char[][] {
-//												new char[] { E, E },
-//												new char[] { I, E },
-//												new char[] { Y, E },
-//												new char[] { O, E },
-//												new char[] { E, I_ },
-//												new char[] { I, I_ },
-//												new char[] { Y, I_ },
-//												new char[] { O, I_ },
-//												new char[] { E, M },
-//												new char[] { I, M },
-//												new char[] { Y, M },
-//												new char[] { O, M },
-//												new char[] { I, X },
-//												new char[] { Y, X },
-//												new char[] { U, IU },
-//												new char[] { IU, IU },
-//												new char[] { A, IA },
-//												new char[] { IA, IA },
-//												new char[] { O, IU },
-//												new char[] { E, IU },
-//												new char[] { I, M, I },
-//												new char[]  { Y, M, I },
-//												new char[]  { E, G, O },
-//												new char[]  { O, G, O },
-//												new char[]  { E, M, U },
-//												new char[]  { O, M, U }
-//											};
-//
-//			participleEndings1 = new char[][] {
-//												  new char[]  { SHCH },
-//												  new char[]  { E, M },
-//												  new char[]  { N, N },
-//												  new char[]  { V, SH },
-//												  new char[]  { IU, SHCH }
-//											  };
-//
-//			participleEndings2 = new char[][] {
-//												  new char[]  { I, V, SH },
-//												  new char[]  { Y, V, SH },
-//												  new char[]  { U, IU, SHCH }
-//											  };
-//
-//			participle1Predessors = new char[][] {
-//													 new char[]  { A },
-//													 new char[]  { IA }
-//												 };
-//
-//			reflexiveEndings = new char[][] {
-//												new char[]  { S, IA },
-//												new char[]  { S, SOFT }
-//											};
-//
-//			verbEndings1 = new char[][] {
-//											new char[]  { I_ },
-//											new char[]  { L },
-//											new char[]  { N },
-//											new char[]  { L, O },
-//											new char[]  { N, O },
-//											new char[]  { E, T },
-//											new char[]  { IU, T },
-//											new char[]  { L, A },
-//											new char[]  { N, A },
-//											new char[]  { L, I },
-//											new char[]  { E, M },
-//											new char[]  { N, Y },
-//											new char[]  { E, T, E },
-//											new char[]  { I_, T, E },
-//											new char[]  { T, SOFT },
-//											new char[]  { E, SH, SOFT },
-//											new char[]  { N, N, O }
-//										};
-//
-//			verbEndings2 = new char[][] {
-//											new char[]  { IU },
-//											new char[]  { U, IU },
-//											new char[]  { E, N },
-//											new char[]  { E, I_ },
-//											new char[]  { IA, T },
-//											new char[]  { U, I_ },
-//											new char[]  { I, L },
-//											new char[]  { Y, L },
-//											new char[]  { I, M },
-//											new char[]  { Y, M },
-//											new char[]  { I, T },
-//											new char[]  { Y, T },
-//											new char[]  { I, L, A },
-//											new char[]  { Y, L, A },
-//											new char[]  { E, N, A },
-//											new char[]  { I, T, E },
-//											new char[]  { I, L, I },
-//											new char[]  { Y, L, I },
-//											new char[]  { I, L, O },
-//											new char[]  { Y, L, O },
-//											new char[]  { E, N, O },
-//											new char[]  { U, E, T },
-//											new char[]  { U, IU, T },
-//											new char[]  { E, N, Y },
-//											new char[]  { I, T, SOFT },
-//											new char[]  { Y, T, SOFT },
-//											new char[]  { I, SH, SOFT },
-//											new char[]  { E, I_, T, E },
-//											new char[]  { U, I_, T, E }
-//										};
-//
-//			verb1Predessors = new char[][] {
-//											   new char[]  { A },
-//											   new char[]  { IA }
-//										   };
-//
-//			nounEndings = new char[][] {
-//										   new char[]  { A },
-//										   new char[]  { IU },
-//										   new char[]  { I_ },
-//										   new char[]  { O },
-//										   new char[]  { U },
-//										   new char[]  { E },
-//										   new char[]  { Y },
-//										   new char[]  { I },
-//										   new char[]  { SOFT },
-//										   new char[]  { IA },
-//										   new char[]  { E, V },
-//										   new char[]  { O, V },
-//										   new char[]  { I, E },
-//										   new char[]  { SOFT, E },
-//										   new char[]  { IA, X },
-//										   new char[]  { I, IU },
-//										   new char[]  { E, I },
-//										   new char[]  { I, I },
-//										   new char[]  { E, I_ },
-//										   new char[]  { O, I_ },
-//										   new char[]  { E, M },
-//										   new char[]  { A, M },
-//										   new char[]  { O, M },
-//										   new char[]  { A, X },
-//										   new char[]  { SOFT, IU },
-//										   new char[]  { I, IA },
-//										   new char[]  { SOFT, IA },
-//										   new char[]  { I, I_ },
-//										   new char[]  { IA, M },
-//										   new char[]  { IA, M, I },
-//										   new char[]  { A, M, I },
-//										   new char[]  { I, E, I_ },
-//										   new char[]  { I, IA, M },
-//										   new char[]  { I, E, M },
-//										   new char[]  { I, IA, X },
-//										   new char[]  { I, IA, M, I }
-//									   };
-//
-//			superlativeEndings = new char[][] {
-//												  new char[]  { E, I_, SH },
-//												  new char[]  { E, I_, SH, E }
-//											  };
-//
-//			derivationalEndings = new char[][] {
-//												   new char[]  { O, S, T },
-//												   new char[]  { O, S, T, SOFT }
-//											   };
-//		}
-
-		/// <summary>
-		/// Finds the stem for given Russian word.
-		/// Creation date: (16/03/2002 3:36:48 PM)
-		/// </summary>
-		/// <param name="input"></param>
-		/// <returns></returns>
-		public String Stem(String input)
-		{
-			MarkPositions(input);
-			if (RV == 0)
-				return input; //RV wasn't detected, nothing to stem
-			StringBuilder stemmingZone = new StringBuilder(input.Substring(RV));
-			// stemming goes on in RV
-			// Step 1
-
-			if (!PerfectiveGerund(stemmingZone))
-			{
-				Reflexive(stemmingZone);
-				bool r =
-					Adjectival(stemmingZone)
-					|| Verb(stemmingZone)
-					|| Noun(stemmingZone);
-			}
-			// Step 2
-			RemoveI(stemmingZone);
-			// Step 3
-			Derivational(stemmingZone);
-			// Step 4
-			Superlative(stemmingZone);
-			UndoubleN(stemmingZone);
-			RemoveSoft(stemmingZone);
-			// return result
-			return input.Substring(0, RV) + stemmingZone.ToString();
-		}
-
-		/// <summary>
-		/// Superlative endings.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool Superlative(StringBuilder stemmingZone)
-		{
-			return FindAndRemoveEnding(stemmingZone, superlativeEndings);
-		}
-
-		/// <summary>
-		/// Undoubles N.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool UndoubleN(StringBuilder stemmingZone)
-		{
-			char[][] doubleN = {
-			new char[] { N, N }
-							   };
-			if (FindEnding(stemmingZone, doubleN) != 0)
-			{
-				stemmingZone.Length = stemmingZone.Length - 1;
-				return true;
-			}
-			else
-			{
-				return false;
-			}
-		}
-
-		/// <summary>
-		/// Verb endings.
-		/// Creation date: (17/03/2002 12:14:58 AM)
-		/// </summary>
-		/// <param name="stemmingZone"></param>
-		/// <returns></returns>
-		private bool Verb(StringBuilder stemmingZone)
-		{
-			return FindAndRemoveEnding(
-				stemmingZone,
-				verbEndings1,
-				verb1Predessors)
-				|| FindAndRemoveEnding(stemmingZone, verbEndings2);
-		}
-
-		/// <summary>
-		/// Static method for stemming with different charsets
-		/// </summary>
-		/// <param name="theWord"></param>
-		/// <param name="charset"></param>
-		/// <returns></returns>
-		public static String Stem(String theWord, char[] charset)
-		{
-			RussianStemmer stemmer = new RussianStemmer();
-			stemmer.SetCharset(charset);
-			return stemmer.Stem(theWord);
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Ru
+{
+	/// <summary>
+	/// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
+	/// </summary>
+	public class RussianStemmer
+	{
+		private char[] charset;
+
+		/// <summary>
+		/// positions of RV, R1 and R2 respectively
+		/// </summary>
+		private int RV, R1, R2;
+
+		/// <summary>
+		/// letters
+		/// </summary>
+		// letters (currently unused letters are commented out)
+		private static char A = (char)0;
+		//private static char B = (char)1;
+		private static char V = (char)2;
+		private static char G = (char)3;
+		//private static char D = (char)4;
+		private static char E = (char)5;
+		//private static char ZH = (char)6;
+		//private static char Z = (char)7;
+		private static char I = (char)8;
+		private static char I_ = (char)9;
+		//private static char K = (char)10;
+		private static char L = (char)11;
+		private static char M = (char)12;
+		private static char N = (char)13;
+		private static char O = (char)14;
+		//private static char P = (char)15;
+		//private static char R = (char)16;
+		private static char S = (char)17;
+		private static char T = (char)18;
+		private static char U = (char)19;
+		//private static char F = (char)20;
+		private static char X = (char)21;
+		//private static char TS = (char)22;
+		//private static char CH = (char)23;
+		private static char SH = (char)24;
+		private static char SHCH = (char)25;
+		//private static char HARD = (char)26;
+		private static char Y = (char)27;
+		private static char SOFT = (char)28;
+		private static char AE = (char)29;
+		private static char IU = (char)30;
+		private static char IA = (char)31;
+
+		/// <summary>
+		/// stem definitions
+		/// </summary>
+		private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
+
+		private static char[][] perfectiveGerundEndings1 = {
+															   new char[] { V },
+															   new char[] { V, SH, I },
+															   new char[] { V, SH, I, S, SOFT }
+														   };
+
+		private static char[][] perfectiveGerund1Predessors = {
+																  new char[] { A },
+																  new char[] { IA }
+															  };
+
+		private static char[][] perfectiveGerundEndings2 = { 
+															   new char[] { I, V }, 
+															   new char[] {Y, V }, 
+															   new char[] {I, V, SH, I }, 
+															   new char[] {Y, V, SH, I }, 
+															   new char[] {I, V, SH, I, S, SOFT }, 
+															   new char[] {Y, V, SH, I, S, SOFT }
+														   };
+
+		private static char[][] adjectiveEndings = {
+													   new char[] { E, E },
+													   new char[] { I, E },
+													   new char[] { Y, E },
+													   new char[] { O, E },
+													   new char[] { E, I_ },
+													   new char[] { I, I_ },
+													   new char[] { Y, I_ },
+													   new char[] { O, I_ },
+													   new char[] { E, M },
+													   new char[] { I, M },
+													   new char[] { Y, M },
+													   new char[] { O, M },
+													   new char[] { I, X },
+													   new char[] { Y, X },
+													   new char[] { U, IU },
+													   new char[] { IU, IU },
+													   new char[] { A, IA },
+													   new char[] { IA, IA },
+													   new char[] { O, IU },
+													   new char[] { E, IU },
+													   new char[] { I, M, I },
+													   new char[] { Y, M, I },
+													   new char[] { E, G, O },
+													   new char[] { O, G, O },
+													   new char[] { E, M, U },
+													   new char[] {O, M, U }
+												   };
+
+		private static char[][] participleEndings1 = {
+														 new char[] { SHCH },
+														 new char[] { E, M },
+														 new char[] { N, N },
+														 new char[] { V, SH },
+														 new char[] { IU, SHCH }
+													 };
+
+		private static char[][] participleEndings2 = {
+														 new char[] { I, V, SH },
+														 new char[] { Y, V, SH },
+														 new char[] { U, IU, SHCH }
+													 };
+
+		private static char[][] participle1Predessors = {
+															new char[] { A },
+															new char[] { IA }
+														};
+
+		private static char[][] reflexiveEndings = {
+													   new char[] { S, IA },
+													   new char[] { S, SOFT }
+												   };
+
+		private static char[][] verbEndings1 = {
+												   new char[] { I_ },
+												   new char[] { L },
+												   new char[] { N },
+												   new char[] { L, O },
+												   new char[] { N, O },
+												   new char[] { E, T },
+												   new char[] { IU, T },
+												   new char[] { L, A },
+												   new char[] { N, A },
+												   new char[] { L, I },
+												   new char[] { E, M },
+												   new char[] { N, Y },
+												   new char[] { E, T, E },
+												   new char[] { I_, T, E },
+												   new char[] { T, SOFT },
+												   new char[] { E, SH, SOFT },
+												   new char[] { N, N, O }
+											   };
+
+		private static char[][] verbEndings2 = {
+												   new char[] { IU },
+												   new char[] { U, IU },
+												   new char[] { E, N },
+												   new char[] { E, I_ },
+												   new char[] { IA, T },
+												   new char[] { U, I_ },
+												   new char[] { I, L },
+												   new char[] { Y, L },
+												   new char[] { I, M },
+												   new char[] { Y, M },
+												   new char[] { I, T },
+												   new char[] { Y, T },
+												   new char[] { I, L, A },
+												   new char[] { Y, L, A },
+												   new char[] { E, N, A },
+												   new char[] { I, T, E },
+												   new char[] { I, L, I },
+												   new char[] { Y, L, I },
+												   new char[] { I, L, O },
+												   new char[] { Y, L, O },
+												   new char[] { E, N, O },
+												   new char[] { U, E, T },
+												   new char[] { U, IU, T },
+												   new char[] { E, N, Y },
+												   new char[] { I, T, SOFT },
+												   new char[] { Y, T, SOFT },
+												   new char[] { I, SH, SOFT },
+												   new char[] { E, I_, T, E },
+												   new char[] { U, I_, T, E }
+											   };
+
+		private static char[][] verb1Predessors = {
+													  new char[] { A },
+													  new char[] { IA }
+												  };
+
+		private static char[][] nounEndings = {
+												  new char[] { A },
+												  new char[] { U },
+												  new char[] { I_ },
+												  new char[] { O },
+												  new char[] { U },
+												  new char[] { E },
+												  new char[] { Y },
+												  new char[] { I },
+												  new char[] { SOFT },
+												  new char[] { IA },
+												  new char[] { E, V },
+												  new char[] { O, V },
+												  new char[] { I, E },
+												  new char[] { SOFT, E },
+												  new char[] { IA, X },
+												  new char[] { I, IU },
+												  new char[] { E, I },
+												  new char[] { I, I },
+												  new char[] { E, I_ },
+												  new char[] { O, I_ },
+												  new char[] { E, M },
+												  new char[] { A, M },
+												  new char[] { O, M },
+												  new char[] { A, X },
+												  new char[] { SOFT, IU },
+												  new char[] { I, IA },
+												  new char[] { SOFT, IA },
+												  new char[] { I, I_ },
+												  new char[] { IA, M },
+												  new char[] { IA, M, I },
+												  new char[] { A, M, I },
+												  new char[] { I, E, I_ },
+												  new char[] { I, IA, M },
+												  new char[] { I, E, M },
+												  new char[] { I, IA, X },
+												  new char[] { I, IA, M, I }
+											  };
+
+		private static char[][] superlativeEndings = {
+														 new char[] { E, I_, SH },
+														 new char[] { E, I_, SH, E }
+													 };
+
+		private static char[][] derivationalEndings = {
+														  new char[] { O, S, T },
+														  new char[] { O, S, T, SOFT }
+													  };
+
+		/// <summary>
+		/// RussianStemmer constructor comment.
+		/// </summary>
+		public RussianStemmer()
+		{
+		}
+
+		/// <summary>
+		/// RussianStemmer constructor comment.
+		/// </summary>
+		/// <param name="charset"></param>
+		public RussianStemmer(char[] charset)
+		{
+			this.charset = charset;
+		}
+
+		/// <summary>
+		/// Adjectival ending is an adjective ending,
+		/// optionally preceded by participle ending.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone">StringBuilder</param>
+		/// <returns></returns>
+		private bool Adjectival(StringBuilder stemmingZone)
+		{
+			// look for adjective ending in a stemming zone
+			if (!FindAndRemoveEnding(stemmingZone, adjectiveEndings))
+				return false;
+			// if adjective ending was found, try for participle ending
+			bool r =
+				FindAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
+				||
+				FindAndRemoveEnding(stemmingZone, participleEndings2);
+			return true;
+		}
+
+		/// <summary>
+		/// Derivational endings
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone">StringBuilder</param>
+		/// <returns></returns>
+		private bool Derivational(StringBuilder stemmingZone)
+		{
+			int endingLength = FindEnding(stemmingZone, derivationalEndings);
+			if (endingLength == 0)
+				// no derivational ending found
+				return false;
+			else
+			{
+				// Ensure that the ending locates in R2
+				if (R2 - RV <= stemmingZone.Length - endingLength)
+				{
+					stemmingZone.Length = stemmingZone.Length - endingLength;
+					return true;
+				}
+				else
+				{
+					return false;
+				}
+			}
+		}
+
+		/// <summary>
+		/// Finds ending among given ending class and returns the length of ending found(0, if not found).
+		/// Creation date: (17/03/2002 8:18:34 PM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <param name="startIndex"></param>
+		/// <param name="theEndingClass"></param>
+		/// <returns></returns>
+		private int FindEnding(StringBuilder stemmingZone, int startIndex, char[][] theEndingClass)
+		{
+			bool match = false;
+			for (int i = theEndingClass.Length - 1; i >= 0; i--)
+			{
+				char[] theEnding = theEndingClass[i];
+				// check if the ending is bigger than stemming zone
+				if (startIndex < theEnding.Length - 1)
+				{
+					match = false;
+					continue;
+				}
+				match = true;
+				int stemmingIndex = startIndex;
+				for (int j = theEnding.Length - 1; j >= 0; j--)
+				{
+					if (stemmingZone[stemmingIndex--] != charset[theEnding[j]])
+					{
+						match = false;
+						break;
+					}
+				}
+				// check if ending was found
+				if (match)
+				{
+					return theEndingClass[i].Length; // cut ending
+				}
+			}
+			return 0;
+		}
+
+		private int FindEnding(StringBuilder stemmingZone, char[][] theEndingClass)
+		{
+			return FindEnding(stemmingZone, stemmingZone.Length - 1, theEndingClass);
+		}
+
+		/// <summary>
+		/// Finds the ending among the given class of endings and removes it from stemming zone.
+		/// Creation date: (17/03/2002 8:18:34 PM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <param name="theEndingClass"></param>
+		/// <returns></returns>
+		private bool FindAndRemoveEnding(StringBuilder stemmingZone, char[][] theEndingClass)
+		{
+			int endingLength = FindEnding(stemmingZone, theEndingClass);
+			if (endingLength == 0)
+				// not found
+				return false;
+			else 
+			{
+				stemmingZone.Length = stemmingZone.Length - endingLength;
+				// cut the ending found
+				return true;
+			}
+		}
+
+		/// <summary>
+		/// Finds the ending among the given class of endings, then checks if this ending was
+		/// preceded by any of given predessors, and if so, removes it from stemming zone.
+		/// Creation date: (17/03/2002 8:18:34 PM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <param name="theEndingClass"></param>
+		/// <param name="thePredessors"></param>
+		/// <returns></returns>
+		private bool FindAndRemoveEnding(StringBuilder stemmingZone,
+			char[][] theEndingClass, char[][] thePredessors)
+		{
+			int endingLength = FindEnding(stemmingZone, theEndingClass);
+			if (endingLength == 0)
+				// not found
+				return false;
+			else
+			{
+				int predessorLength =
+					FindEnding(stemmingZone,
+					stemmingZone.Length - endingLength - 1,
+					thePredessors);
+				if (predessorLength == 0)
+					return false;
+				else 
+				{
+					stemmingZone.Length = stemmingZone.Length - endingLength;
+					// cut the ending found
+					return true;
+				}
+			}
+
+		}
+
+		/// <summary>
+		/// Marks positions of RV, R1 and R2 in a given word.
+		/// Creation date: (16/03/2002 3:40:11 PM)
+		/// </summary>
+		/// <param name="word"></param>
+		private void MarkPositions(String word)
+		{
+			RV = 0;
+			R1 = 0;
+			R2 = 0;
+			int i = 0;
+			// find RV
+			while (word.Length > i && !IsVowel(word[i]))
+			{
+				i++;
+			}
+			if (word.Length - 1 < ++i)
+				return; // RV zone is empty
+			RV = i;
+			// find R1
+			while (word.Length > i && IsVowel(word[i]))
+			{
+				i++;
+			}
+			if (word.Length - 1 < ++i)
+				return; // R1 zone is empty
+			R1 = i;
+			// find R2
+			while (word.Length > i && !IsVowel(word[i]))
+			{
+				i++;
+			}
+			if (word.Length - 1 < ++i)
+				return; // R2 zone is empty
+			while (word.Length > i && IsVowel(word[i]))
+			{
+				i++;
+			}
+			if (word.Length - 1 < ++i)
+				return; // R2 zone is empty
+			R2 = i;
+		}
+
+		/// <summary>
+		/// Checks if character is a vowel..
+		/// Creation date: (16/03/2002 10:47:03 PM)
+		/// </summary>
+		/// <param name="letter"></param>
+		/// <returns></returns>
+		private bool IsVowel(char letter)
+		{
+			for (int i = 0; i < vowels.Length; i++)
+			{
+				if (letter == charset[vowels[i]])
+					return true;
+			}
+			return false;
+		}
+
+		/// <summary>
+		/// Noun endings.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool Noun(StringBuilder stemmingZone)
+		{
+			return FindAndRemoveEnding(stemmingZone, nounEndings);
+		}
+
+		/// <summary>
+		/// Perfective gerund endings.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool PerfectiveGerund(StringBuilder stemmingZone)
+		{
+			return FindAndRemoveEnding(
+				stemmingZone,
+				perfectiveGerundEndings1,
+				perfectiveGerund1Predessors)
+				|| FindAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
+		}
+
+		/// <summary>
+		/// Reflexive endings.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool Reflexive(StringBuilder stemmingZone)
+		{
+			return FindAndRemoveEnding(stemmingZone, reflexiveEndings);
+		}
+
+		/// <summary>
+		/// Insert the method's description here.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool RemoveI(StringBuilder stemmingZone)
+		{
+			if (stemmingZone.Length > 0
+				&& stemmingZone[stemmingZone.Length - 1] == charset[I])
+			{
+				stemmingZone.Length = stemmingZone.Length - 1;
+				return true;
+			}
+			else
+			{
+				return false;
+			}
+		}
+
+		/// <summary>
+		/// Insert the method's description here.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool RemoveSoft(StringBuilder stemmingZone)
+		{
+			if (stemmingZone.Length > 0
+				&& stemmingZone[stemmingZone.Length - 1] == charset[SOFT])
+			{
+				stemmingZone.Length = stemmingZone.Length - 1;
+				return true;
+			}
+			else
+			{
+				return false;
+			}
+		}
+
+		/// <summary>
+		/// Insert the method's description here.
+		/// Creation date: (16/03/2002 10:58:42 PM)
+		/// </summary>
+		/// <param name="newCharset"></param>
+		public void SetCharset(char[] newCharset)
+		{
+			charset = newCharset;
+		}
+
+//		/// <summary>
+//		/// Set ending definition as in Russian stemming algorithm.
+//		/// Creation date: (16/03/2002 11:16:36 PM)
+//		/// </summary>
+//		private void SetEndings()
+//		{
+//			vowels = new char[] { A, E, I, O, U, Y, AE, IU, IA };
+//
+//			perfectiveGerundEndings1 = new char[][] {
+//														new char[]  { V }, new char[]  { V, SH, I }, new char[]  { V, SH, I, S, SOFT }
+//													};
+//
+//			perfectiveGerund1Predessors = new char[][] { 
+//														   new char[]  { A }, new char[]  { IA }
+//													   };
+//
+//			perfectiveGerundEndings2 = new char[][] {
+//														new char[]  { I, V },
+//														new char[]  { Y, V },
+//														new char[]  { I, V, SH, I },
+//														new char[]  { Y, V, SH, I },
+//														new char[]  { I, V, SH, I, S, SOFT },
+//														new char[]  { Y, V, SH, I, S, SOFT }
+//													};
+//
+//			adjectiveEndings = new char[][] {
+//												new char[] { E, E },
+//												new char[] { I, E },
+//												new char[] { Y, E },
+//												new char[] { O, E },
+//												new char[] { E, I_ },
+//												new char[] { I, I_ },
+//												new char[] { Y, I_ },
+//												new char[] { O, I_ },
+//												new char[] { E, M },
+//												new char[] { I, M },
+//												new char[] { Y, M },
+//												new char[] { O, M },
+//												new char[] { I, X },
+//												new char[] { Y, X },
+//												new char[] { U, IU },
+//												new char[] { IU, IU },
+//												new char[] { A, IA },
+//												new char[] { IA, IA },
+//												new char[] { O, IU },
+//												new char[] { E, IU },
+//												new char[] { I, M, I },
+//												new char[]  { Y, M, I },
+//												new char[]  { E, G, O },
+//												new char[]  { O, G, O },
+//												new char[]  { E, M, U },
+//												new char[]  { O, M, U }
+//											};
+//
+//			participleEndings1 = new char[][] {
+//												  new char[]  { SHCH },
+//												  new char[]  { E, M },
+//												  new char[]  { N, N },
+//												  new char[]  { V, SH },
+//												  new char[]  { IU, SHCH }
+//											  };
+//
+//			participleEndings2 = new char[][] {
+//												  new char[]  { I, V, SH },
+//												  new char[]  { Y, V, SH },
+//												  new char[]  { U, IU, SHCH }
+//											  };
+//
+//			participle1Predessors = new char[][] {
+//													 new char[]  { A },
+//													 new char[]  { IA }
+//												 };
+//
+//			reflexiveEndings = new char[][] {
+//												new char[]  { S, IA },
+//												new char[]  { S, SOFT }
+//											};
+//
+//			verbEndings1 = new char[][] {
+//											new char[]  { I_ },
+//											new char[]  { L },
+//											new char[]  { N },
+//											new char[]  { L, O },
+//											new char[]  { N, O },
+//											new char[]  { E, T },
+//											new char[]  { IU, T },
+//											new char[]  { L, A },
+//											new char[]  { N, A },
+//											new char[]  { L, I },
+//											new char[]  { E, M },
+//											new char[]  { N, Y },
+//											new char[]  { E, T, E },
+//											new char[]  { I_, T, E },
+//											new char[]  { T, SOFT },
+//											new char[]  { E, SH, SOFT },
+//											new char[]  { N, N, O }
+//										};
+//
+//			verbEndings2 = new char[][] {
+//											new char[]  { IU },
+//											new char[]  { U, IU },
+//											new char[]  { E, N },
+//											new char[]  { E, I_ },
+//											new char[]  { IA, T },
+//											new char[]  { U, I_ },
+//											new char[]  { I, L },
+//											new char[]  { Y, L },
+//											new char[]  { I, M },
+//											new char[]  { Y, M },
+//											new char[]  { I, T },
+//											new char[]  { Y, T },
+//											new char[]  { I, L, A },
+//											new char[]  { Y, L, A },
+//											new char[]  { E, N, A },
+//											new char[]  { I, T, E },
+//											new char[]  { I, L, I },
+//											new char[]  { Y, L, I },
+//											new char[]  { I, L, O },
+//											new char[]  { Y, L, O },
+//											new char[]  { E, N, O },
+//											new char[]  { U, E, T },
+//											new char[]  { U, IU, T },
+//											new char[]  { E, N, Y },
+//											new char[]  { I, T, SOFT },
+//											new char[]  { Y, T, SOFT },
+//											new char[]  { I, SH, SOFT },
+//											new char[]  { E, I_, T, E },
+//											new char[]  { U, I_, T, E }
+//										};
+//
+//			verb1Predessors = new char[][] {
+//											   new char[]  { A },
+//											   new char[]  { IA }
+//										   };
+//
+//			nounEndings = new char[][] {
+//										   new char[]  { A },
+//										   new char[]  { IU },
+//										   new char[]  { I_ },
+//										   new char[]  { O },
+//										   new char[]  { U },
+//										   new char[]  { E },
+//										   new char[]  { Y },
+//										   new char[]  { I },
+//										   new char[]  { SOFT },
+//										   new char[]  { IA },
+//										   new char[]  { E, V },
+//										   new char[]  { O, V },
+//										   new char[]  { I, E },
+//										   new char[]  { SOFT, E },
+//										   new char[]  { IA, X },
+//										   new char[]  { I, IU },
+//										   new char[]  { E, I },
+//										   new char[]  { I, I },
+//										   new char[]  { E, I_ },
+//										   new char[]  { O, I_ },
+//										   new char[]  { E, M },
+//										   new char[]  { A, M },
+//										   new char[]  { O, M },
+//										   new char[]  { A, X },
+//										   new char[]  { SOFT, IU },
+//										   new char[]  { I, IA },
+//										   new char[]  { SOFT, IA },
+//										   new char[]  { I, I_ },
+//										   new char[]  { IA, M },
+//										   new char[]  { IA, M, I },
+//										   new char[]  { A, M, I },
+//										   new char[]  { I, E, I_ },
+//										   new char[]  { I, IA, M },
+//										   new char[]  { I, E, M },
+//										   new char[]  { I, IA, X },
+//										   new char[]  { I, IA, M, I }
+//									   };
+//
+//			superlativeEndings = new char[][] {
+//												  new char[]  { E, I_, SH },
+//												  new char[]  { E, I_, SH, E }
+//											  };
+//
+//			derivationalEndings = new char[][] {
+//												   new char[]  { O, S, T },
+//												   new char[]  { O, S, T, SOFT }
+//											   };
+//		}
+
+		/// <summary>
+		/// Finds the stem for given Russian word.
+		/// Creation date: (16/03/2002 3:36:48 PM)
+		/// </summary>
+		/// <param name="input"></param>
+		/// <returns></returns>
+		public String Stem(String input)
+		{
+			MarkPositions(input);
+			if (RV == 0)
+				return input; //RV wasn't detected, nothing to stem
+			StringBuilder stemmingZone = new StringBuilder(input.Substring(RV));
+			// stemming goes on in RV
+			// Step 1
+
+			if (!PerfectiveGerund(stemmingZone))
+			{
+				Reflexive(stemmingZone);
+				bool r =
+					Adjectival(stemmingZone)
+					|| Verb(stemmingZone)
+					|| Noun(stemmingZone);
+			}
+			// Step 2
+			RemoveI(stemmingZone);
+			// Step 3
+			Derivational(stemmingZone);
+			// Step 4
+			Superlative(stemmingZone);
+			UndoubleN(stemmingZone);
+			RemoveSoft(stemmingZone);
+			// return result
+			return input.Substring(0, RV) + stemmingZone.ToString();
+		}
+
+		/// <summary>
+		/// Superlative endings.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool Superlative(StringBuilder stemmingZone)
+		{
+			return FindAndRemoveEnding(stemmingZone, superlativeEndings);
+		}
+
+		/// <summary>
+		/// Undoubles N.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool UndoubleN(StringBuilder stemmingZone)
+		{
+			char[][] doubleN = {
+			new char[] { N, N }
+							   };
+			if (FindEnding(stemmingZone, doubleN) != 0)
+			{
+				stemmingZone.Length = stemmingZone.Length - 1;
+				return true;
+			}
+			else
+			{
+				return false;
+			}
+		}
+
+		/// <summary>
+		/// Verb endings.
+		/// Creation date: (17/03/2002 12:14:58 AM)
+		/// </summary>
+		/// <param name="stemmingZone"></param>
+		/// <returns></returns>
+		private bool Verb(StringBuilder stemmingZone)
+		{
+			return FindAndRemoveEnding(
+				stemmingZone,
+				verbEndings1,
+				verb1Predessors)
+				|| FindAndRemoveEnding(stemmingZone, verbEndings2);
+		}
+
+		/// <summary>
+		/// Static method for stemming with different charsets
+		/// </summary>
+		/// <param name="theWord"></param>
+		/// <param name="charset"></param>
+		/// <returns></returns>
+		public static String Stem(String theWord, char[] charset)
+		{
+			RussianStemmer stemmer = new RussianStemmer();
+			stemmer.SetCharset(charset);
+			return stemmer.Stem(theWord);
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs Fri Jan 13 08:42:34 2012
@@ -1,105 +1,126 @@
-using System;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis
-{
-	/// <summary>
-	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
-	/// should contain only one word. If the file is not found or on any error, an
-	/// empty table is returned.
-	/// </summary>
-	public class WordlistLoader
-	{
-		/// <summary>
-		/// Load words table from the file
-		/// </summary>
-		/// <param name="path">Path to the wordlist</param>
-		/// <param name="wordfile">Name of the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(String path, String wordfile) 
-		{
-			if ( path == null || wordfile == null ) 
-			{
-				return new List<string>();
-			}
-			return GetWordtable(new FileInfo(path + "\\" + wordfile));
-		}
-
-		/// <summary>
-		/// Load words table from the file
-		/// </summary>
-		/// <param name="wordfile">Complete path to the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(String wordfile) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new List<string>();
-			}
-			return GetWordtable( new FileInfo( wordfile ) );
-		}
-
-		/// <summary>
-		/// Load words table from the file 
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist</param>
-		/// <returns></returns>
-		public static ICollection<string> GetWordtable( FileInfo wordfile ) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new List<string>();
-			}			
-			StreamReader lnr = new StreamReader(wordfile.FullName);
-			return GetWordtable(lnr);
-		}
-
-		/// <summary>
-		/// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
-		/// leading and trailing whitespace). Every line of the Reader should contain only
-		/// one word. The words need to be in lowercase if you make use of an
-		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-		/// </summary>
-		/// <param name="reader">Reader containing the wordlist</param>
-		/// <returns>A Hashtable with the reader's words</returns>
-		public static ICollection<string> GetWordtable(TextReader reader)
-		{
-			ICollection<string> result = new List<string>();			
-			try 
-			{				
-				List<string> stopWords = new List<string>();
-				String word = null;
-				while ( ( word = reader.ReadLine() ) != null ) 
-				{
-					stopWords.Add(word.Trim());
-				}
-				result = MakeWordTable(stopWords.ToArray(), stopWords.Count);
-			}
-				// On error, use an empty table
-			catch (IOException) 
-			{
-				result = new List<string>();
-			}
-			return result;
-		}
-
-
-		/// <summary>
-		/// Builds the wordlist table.
-		/// </summary>
-		/// <param name="words">Word that where read</param>
-		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
-		/// <returns></returns>
-		private static ICollection<string> MakeWordTable( String[] words, int length ) 
-		{
-			List<string> table = new List<string>( length );
-			for ( int i = 0; i < length; i++ ) 
-			{
-				table.Add(words[i]);
-			}
-			return table;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+	/// <summary>
+	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
+	/// should contain only one word. If the file is not found or on any error, an
+	/// empty table is returned.
+	/// </summary>
+	public class WordlistLoader
+	{
+		/// <summary>
+		/// Load words table from the file
+		/// </summary>
+		/// <param name="path">Path to the wordlist</param>
+		/// <param name="wordfile">Name of the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(String path, String wordfile) 
+		{
+			if ( path == null || wordfile == null ) 
+			{
+				return new List<string>();
+			}
+			return GetWordtable(new FileInfo(path + "\\" + wordfile));
+		}
+
+		/// <summary>
+		/// Load words table from the file
+		/// </summary>
+		/// <param name="wordfile">Complete path to the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(String wordfile) 
+		{
+			if ( wordfile == null ) 
+			{
+				return new List<string>();
+			}
+			return GetWordtable( new FileInfo( wordfile ) );
+		}
+
+		/// <summary>
+		/// Load words table from the file 
+		/// </summary>
+		/// <param name="wordfile">File containing the wordlist</param>
+		/// <returns></returns>
+		public static ICollection<string> GetWordtable( FileInfo wordfile ) 
+		{
+			if ( wordfile == null ) 
+			{
+				return new List<string>();
+			}			
+			StreamReader lnr = new StreamReader(wordfile.FullName);
+			return GetWordtable(lnr);
+		}
+
+		/// <summary>
+		/// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+		/// leading and trailing whitespace). Every line of the Reader should contain only
+		/// one word. The words need to be in lowercase if you make use of an
+		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+		/// </summary>
+		/// <param name="reader">Reader containing the wordlist</param>
+		/// <returns>A Hashtable with the reader's words</returns>
+		public static ICollection<string> GetWordtable(TextReader reader)
+		{
+			ICollection<string> result = new List<string>();			
+			try 
+			{				
+				List<string> stopWords = new List<string>();
+				String word = null;
+				while ( ( word = reader.ReadLine() ) != null ) 
+				{
+					stopWords.Add(word.Trim());
+				}
+				result = MakeWordTable(stopWords.ToArray(), stopWords.Count);
+			}
+				// On error, use an empty table
+			catch (IOException) 
+			{
+				result = new List<string>();
+			}
+			return result;
+		}
+
+
+		/// <summary>
+		/// Builds the wordlist table.
+		/// </summary>
+		/// <param name="words">Word that where read</param>
+		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
+		/// <returns></returns>
+		private static ICollection<string> MakeWordTable( String[] words, int length ) 
+		{
+			List<string> table = new List<string>( length );
+			for ( int i = 0; i < length; i++ ) 
+			{
+				table.Add(words[i]);
+			}
+			return table;
+		}
+	}
+}



Mime
View raw message