lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [03/52] [abbrv] lucenenet git commit: Ported Analysis.Hunspell + tests
Date Thu, 01 Sep 2016 14:39:24 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
index bf59a70..298f481 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
@@ -1,9 +1,10 @@
-using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using System.Collections.Generic;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,152 +21,149 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-
-	/// <summary>
-	/// TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
-	/// stems, this filter can emit multiple tokens for each consumed token
-	/// 
-	/// <para>
-	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
-	/// certain terms from being passed to the stemmer
-	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
-	/// in a previous <seealso cref="TokenStream"/>.
-	/// 
-	/// Note: For including the original term as well as the stemmed version, see
-	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
-	/// </para>
-	/// 
-	/// @lucene.experimental
-	/// </summary>
-	public sealed class HunspellStemFilter : TokenFilter
-	{
-
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
-	  private readonly Stemmer stemmer;
-
-	  private IList<CharsRef> buffer;
-	  private State savedState;
-
-	  private readonly bool dedup;
-	  private readonly bool longestOnly;
-
-	  /// <summary>
-	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
-	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean)  </seealso>
-	  public HunspellStemFilter(TokenStream input, Dictionary dictionary) : this(input, dictionary, true)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
-	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean)  </seealso>
-	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup) : this(input, dictionary, dedup, false)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
-	  /// Dictionary
-	  /// </summary>
-	  /// <param name="input"> TokenStream whose tokens will be stemmed </param>
-	  /// <param name="dictionary"> HunspellDictionary containing the affix rules and words that will be used to stem the tokens </param>
-	  /// <param name="longestOnly"> true if only the longest term should be output. </param>
-	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly) : base(input)
-	  {
-		this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
-		this.stemmer = new Stemmer(dictionary);
-		this.longestOnly = longestOnly;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (buffer != null && buffer.Count > 0)
-		{
-		  CharsRef nextStem = buffer.Remove(0);
-		  restoreState(savedState);
-		  posIncAtt.PositionIncrement = 0;
-		  termAtt.setEmpty().append(nextStem);
-		  return true;
-		}
-
-		if (!input.incrementToken())
-		{
-		  return false;
-		}
-
-		if (keywordAtt.Keyword)
-		{
-		  return true;
-		}
-
-		buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
-
-		if (buffer.Count == 0) // we do not know this word, return it unchanged
-		{
-		  return true;
-		}
-
-		if (longestOnly && buffer.Count > 1)
-		{
-		  buffer.Sort(lengthComparator);
-		}
-
-		CharsRef stem = buffer.Remove(0);
-		termAtt.setEmpty().append(stem);
-
-		if (longestOnly)
-		{
-		  buffer.Clear();
-		}
-		else
-		{
-		  if (buffer.Count > 0)
-		  {
-			savedState = captureState();
-		  }
-		}
-
-		return true;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		buffer = null;
-	  }
-
-	  internal static readonly IComparer<CharsRef> lengthComparator = new ComparatorAnonymousInnerClassHelper();
-
-	  private class ComparatorAnonymousInnerClassHelper : IComparer<CharsRef>
-	  {
-		  public ComparatorAnonymousInnerClassHelper()
-		  {
-		  }
-
-		  public virtual int Compare(CharsRef o1, CharsRef o2)
-		  {
-			if (o2.length == o1.length)
-			{
-			  // tie break on text
-			  return o2.compareTo(o1);
-			}
-			else
-			{
-			  return o2.length < o1.length ? - 1 : 1;
-			}
-		  }
-	  }
-	}
-
+    /// <summary>
+    /// TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
+    /// stems, this filter can emit multiple tokens for each consumed token
+    /// 
+    /// <para>
+    /// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+    /// certain terms from being passed to the stemmer
+    /// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+    /// in a previous <seealso cref="TokenStream"/>.
+    /// 
+    /// Note: For including the original term as well as the stemmed version, see
+    /// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+    /// </para>
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public sealed class HunspellStemFilter : TokenFilter
+    {
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+        private readonly IKeywordAttribute keywordAtt;
+        private readonly Stemmer stemmer;
+
+        private List<CharsRef> buffer;
+        private State savedState;
+
+        private readonly bool dedup;
+        private readonly bool longestOnly;
+
+        /// <summary>
+        /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+        ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean)  </seealso>
+        public HunspellStemFilter(TokenStream input, Dictionary dictionary)
+              : this(input, dictionary, true)
+        {
+        }
+
+        /// <summary>
+        /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+        ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean)  </seealso>
+        public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup)
+              : this(input, dictionary, dedup, false)
+        {
+        }
+
+        /// <summary>
+        /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
+        /// Dictionary
+        /// </summary>
+        /// <param name="input"> TokenStream whose tokens will be stemmed </param>
+        /// <param name="dictionary"> HunspellDictionary containing the affix rules and words that will be used to stem the tokens </param>
+        /// <param name="longestOnly"> true if only the longest term should be output. </param>
+        public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly) :
+              base(input)
+        {
+            this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
+            this.stemmer = new Stemmer(dictionary);
+            this.longestOnly = longestOnly;
+            termAtt = AddAttribute<ICharTermAttribute>();
+            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+            keywordAtt = AddAttribute<IKeywordAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (buffer != null && buffer.Count > 0)
+            {
+                CharsRef nextStem = buffer[0];
+                buffer.RemoveAt(0);
+                RestoreState(savedState);
+                posIncAtt.PositionIncrement = 0;
+                termAtt.SetEmpty().Append(nextStem);
+                return true;
+            }
+
+            if (!input.IncrementToken())
+            {
+                return false;
+            }
+
+            if (keywordAtt.Keyword)
+            {
+                return true;
+            }
+
+            buffer = new List<CharsRef>(dedup ? stemmer.UniqueStems(termAtt.Buffer(), termAtt.Length) : stemmer.Stem(termAtt.Buffer(), termAtt.Length));
+
+            if (buffer.Count == 0) // we do not know this word, return it unchanged
+            {
+                return true;
+            }
+
+            if (longestOnly && buffer.Count > 1)
+            {
+                buffer.Sort(lengthComparator);
+            }
+
+            CharsRef stem = buffer[0];
+            buffer.RemoveAt(0);
+            termAtt.SetEmpty().Append(stem);
+
+            if (longestOnly)
+            {
+                buffer.Clear();
+            }
+            else
+            {
+                if (buffer.Count > 0)
+                {
+                    savedState = CaptureState();
+                }
+            }
+
+            return true;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            buffer = null;
+        }
+
+        internal static readonly IComparer<CharsRef> lengthComparator = new ComparatorAnonymousInnerClassHelper();
+
+        private class ComparatorAnonymousInnerClassHelper : IComparer<CharsRef>
+        {
+            public ComparatorAnonymousInnerClassHelper()
+            {
+            }
+
+            public virtual int Compare(CharsRef o1, CharsRef o2)
+            {
+                if (o2.Length == o1.Length)
+                {
+                    // tie break on text
+                    return o2.CompareTo(o1);
+                }
+                else
+                {
+                    return o2.Length < o1.Length ? -1 : 1;
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
index c9888fd..4d720f4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
@@ -1,9 +1,12 @@
-using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,97 +23,88 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// TokenFilterFactory that creates instances of <seealso cref="HunspellStemFilter"/>.
+    /// Example config for British English:
+    /// <pre class="prettyprint">
+    /// &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
+    ///         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
+    ///         affix=&quot;en_GB.aff&quot; 
+    ///         ignoreCase=&quot;false&quot;
+    ///         longestOnly=&quot;false&quot; /&gt;</pre>
+    /// Both parameters dictionary and affix are mandatory.
+    /// Dictionaries for many languages are available through the OpenOffice project.
+    /// 
+    /// See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
+    /// @lucene.experimental
+    /// </summary>
+    public class HunspellStemFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private const string PARAM_DICTIONARY = "dictionary";
+        private const string PARAM_AFFIX = "affix";
+        private const string PARAM_RECURSION_CAP = "recursionCap";
+        private const string PARAM_IGNORE_CASE = "ignoreCase";
+        private const string PARAM_LONGEST_ONLY = "longestOnly";
 
-	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
-	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
-	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-
-	/// <summary>
-	/// TokenFilterFactory that creates instances of <seealso cref="HunspellStemFilter"/>.
-	/// Example config for British English:
-	/// <pre class="prettyprint">
-	/// &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
-	///         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
-	///         affix=&quot;en_GB.aff&quot; 
-	///         ignoreCase=&quot;false&quot;
-	///         longestOnly=&quot;false&quot; /&gt;</pre>
-	/// Both parameters dictionary and affix are mandatory.
-	/// Dictionaries for many languages are available through the OpenOffice project.
-	/// 
-	/// See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
-	/// @lucene.experimental
-	/// </summary>
-	public class HunspellStemFilterFactory : TokenFilterFactory, ResourceLoaderAware
-	{
-	  private const string PARAM_DICTIONARY = "dictionary";
-	  private const string PARAM_AFFIX = "affix";
-	  private const string PARAM_RECURSION_CAP = "recursionCap";
-	  private const string PARAM_IGNORE_CASE = "ignoreCase";
-	  private const string PARAM_LONGEST_ONLY = "longestOnly";
-
-	  private readonly string dictionaryFiles;
-	  private readonly string affixFile;
-	  private readonly bool ignoreCase;
-	  private readonly bool longestOnly;
-	  private Dictionary dictionary;
-
-	  /// <summary>
-	  /// Creates a new HunspellStemFilterFactory </summary>
-	  public HunspellStemFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		dictionaryFiles = require(args, PARAM_DICTIONARY);
-		affixFile = get(args, PARAM_AFFIX);
-		ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
-		longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false);
-		// this isnt necessary: we properly load all dictionaries.
-		// but recognize and ignore for back compat
-		getBoolean(args, "strictAffixParsing", true);
-		// this isn't necessary: multi-stage stripping is fixed and 
-		// flags like COMPLEXPREFIXES in the data itself control this.
-		// but recognize and ignore for back compat
-		getInt(args, "recursionCap", 0);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
+        private readonly string dictionaryFiles;
+        private readonly string affixFile;
+        private readonly bool ignoreCase;
+        private readonly bool longestOnly;
+        private Dictionary dictionary;
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
-	  public virtual void inform(ResourceLoader loader)
-	  {
-		string[] dicts = dictionaryFiles.Split(",", true);
+        /// <summary>
+        /// Creates a new HunspellStemFilterFactory </summary>
+        public HunspellStemFilterFactory(IDictionary<string, string> args) : base(args)
+        {
+            dictionaryFiles = Require(args, PARAM_DICTIONARY);
+            affixFile = Get(args, PARAM_AFFIX);
+            ignoreCase = GetBoolean(args, PARAM_IGNORE_CASE, false);
+            longestOnly = GetBoolean(args, PARAM_LONGEST_ONLY, false);
+            // this isnt necessary: we properly load all dictionaries.
+            // but recognize and ignore for back compat
+            GetBoolean(args, "strictAffixParsing", true);
+            // this isn't necessary: multi-stage stripping is fixed and 
+            // flags like COMPLEXPREFIXES in the data itself control this.
+            // but recognize and ignore for back compat
+            GetInt(args, "recursionCap", 0);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
-		InputStream affix = null;
-		IList<InputStream> dictionaries = new List<InputStream>();
+        public virtual void Inform(IResourceLoader loader)
+        {
+            string[] dicts = dictionaryFiles.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
 
-		try
-		{
-		  dictionaries = new List<>();
-		  foreach (string file in dicts)
-		  {
-			dictionaries.Add(loader.openResource(file));
-		  }
-		  affix = loader.openResource(affixFile);
+            Stream affix = null;
+            IList<Stream> dictionaries = new List<Stream>();
 
-		  this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
-		}
-		catch (ParseException e)
-		{
-		  throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(affix);
-		  IOUtils.closeWhileHandlingException(dictionaries);
-		}
-	  }
+            try
+            {
+                dictionaries = new List<Stream>();
+                foreach (string file in dicts)
+                {
+                    dictionaries.Add(loader.OpenResource(file));
+                }
+                affix = loader.OpenResource(affixFile);
 
-	  public override TokenStream create(TokenStream tokenStream)
-	  {
-		return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
-	  }
-	}
+                this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
+            }
+            catch (Exception e)
+            {
+                throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(affix);
+                IOUtils.CloseWhileHandlingException(dictionaries);
+            }
+        }
 
+        public override TokenStream Create(TokenStream tokenStream)
+        {
+            return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
index 87872c9..597d6ec 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
@@ -1,7 +1,9 @@
-namespace org.apache.lucene.analysis.hunspell
-{
+using System;
+using System.Text;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,30 +20,111 @@
 	 * limitations under the License.
 	 */
 
+    // LUCENENET NOTE: This class was refactored from its Java counterpart.
+
+    // many hunspell dictionaries use this encoding, yet java/.NET does not have it?!?!
+    internal sealed class ISO8859_14Encoding : Encoding
+    {
+        private static readonly Decoder decoder = new ISO8859_14Decoder();
+        public override Decoder GetDecoder()
+        {
+            return new ISO8859_14Decoder();
+        }
+
+        public override string EncodingName
+        {
+            get
+            {
+                return "iso-8859-14";
+            }
+        }
+
+        public override int CodePage
+        {
+            get
+            {
+                return 28604;
+            }
+        }
+
+        public override int GetCharCount(byte[] bytes, int index, int count)
+        {
+            return decoder.GetCharCount(bytes, index, count);
+        }
+
+        public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
+        {
+            return decoder.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
+        }
+
+        public override int GetMaxCharCount(int byteCount)
+        {
+            return byteCount;
+        }
+
+
+        #region Encoding Not Implemented
+        public override int GetByteCount(char[] chars, int index, int count)
+        {
+            throw new NotImplementedException();
+        }
+
+        public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+        {
+            throw new NotImplementedException();
+        }
+
+        public override int GetMaxByteCount(int charCount)
+        {
+            throw new NotImplementedException();
+        }
+
+        #endregion
+    }
 
-	// many hunspell dictionaries use this encoding, yet java does not have it?!?!
-	internal sealed class ISO8859_14Decoder : CharsetDecoder
-	{
+    internal sealed class ISO8859_14Decoder : Decoder
+    {
+        internal static readonly char[] TABLE = new char[]
+        {
+            (char)0x00A0, (char)0x1E02, (char)0x1E03, (char)0x00A3, (char)0x010A, (char)0x010B, (char)0x1E0A, (char)0x00A7,
+            (char)0x1E80, (char)0x00A9, (char)0x1E82, (char)0x1E0B, (char)0x1EF2, (char)0x00AD, (char)0x00AE, (char)0x0178,
+            (char)0x1E1E, (char)0x1E1F, (char)0x0120, (char)0x0121, (char)0x1E40, (char)0x1E41, (char)0x00B6, (char)0x1E56,
+            (char)0x1E81, (char)0x1E57, (char)0x1E83, (char)0x1E60, (char)0x1EF3, (char)0x1E84, (char)0x1E85, (char)0x1E61,
+            (char)0x00C0, (char)0x00C1, (char)0x00C2, (char)0x00C3, (char)0x00C4, (char)0x00C5, (char)0x00C6, (char)0x00C7,
+            (char)0x00C8, (char)0x00C9, (char)0x00CA, (char)0x00CB, (char)0x00CC, (char)0x00CD, (char)0x00CE, (char)0x00CF,
+            (char)0x0174, (char)0x00D1, (char)0x00D2, (char)0x00D3, (char)0x00D4, (char)0x00D5, (char)0x00D6, (char)0x1E6A,
+            (char)0x00D8, (char)0x00D9, (char)0x00DA, (char)0x00DB, (char)0x00DC, (char)0x00DD, (char)0x0176, (char)0x00DF,
+            (char)0x00E0, (char)0x00E1, (char)0x00E2, (char)0x00E3, (char)0x00E4, (char)0x00E5, (char)0x00E6, (char)0x00E7,
+            (char)0x00E8, (char)0x00E9, (char)0x00EA, (char)0x00EB, (char)0x00EC, (char)0x00ED, (char)0x00EE, (char)0x00EF,
+            (char)0x0175, (char)0x00F1, (char)0x00F2, (char)0x00F3, (char)0x00F4, (char)0x00F5, (char)0x00F6, (char)0x1E6B,
+            (char)0x00F8, (char)0x00F9, (char)0x00FA, (char)0x00FB, (char)0x00FC, (char)0x00FD, (char)0x0177, (char)0x00FF
+        };
 
-	  internal static readonly char[] TABLE = new char[] {0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF};
+        public override int GetCharCount(byte[] bytes, int index, int count)
+        {
+            return count;
+        }
 
-	  internal ISO8859_14Decoder() : base(StandardCharsets.ISO_88591, 1f, 1f); / / fake with similar properties
-	  {
-	  }
+        public override int GetChars(byte[] bytesIn, int byteIndex, int byteCount, char[] charsOut, int charIndex)
+        {
+            int writeCount = 0;
+            int charPointer = charIndex;
 
-	  protected internal override CoderResult decodeLoop(ByteBuffer @in, CharBuffer @out)
-	  {
-		while (@in.hasRemaining() && @out.hasRemaining())
-		{
-		  char ch = (char)(@in.get() & 0xff);
-		  if (ch >= 0xA0)
-		  {
-			ch = TABLE[ch - 0xA0];
-		  }
-		  @out.put(ch);
-		}
-		return @in.hasRemaining() ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW;
-	  }
-	}
+            for (int i = byteIndex; i <= (byteIndex + byteCount); i++)
+            {
+                // Decode the value
+                char ch = (char)(bytesIn[i] & 0xff);
+                if (ch >= 0xA0)
+                {
+                    ch = TABLE[ch - 0xA0];
+                }
+                // write the value to the correct buffer slot
+                charsOut[charPointer] = ch;
+                writeCount++;
+                charPointer++;
+            }
 
+            return writeCount;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
index 70b4a94..8e0070d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
@@ -1,12 +1,17 @@
-using System;
-using System.Diagnostics;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using System;
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
 using System.Text;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -23,453 +28,438 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
-
-	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-	using ByteArrayDataInput = org.apache.lucene.store.ByteArrayDataInput;
-	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
-	using BytesRef = org.apache.lucene.util.BytesRef;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using IntsRef = org.apache.lucene.util.IntsRef;
-	using Version = org.apache.lucene.util.Version;
-	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
-
-	/// <summary>
-	/// Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word.  It
-	/// conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
-	/// </summary>
-	internal sealed class Stemmer
-	{
-	  private readonly Dictionary dictionary;
-	  private readonly BytesRef scratch = new BytesRef();
-	  private readonly StringBuilder segment = new StringBuilder();
-	  private readonly ByteArrayDataInput affixReader;
-
-	  // used for normalization
-	  private readonly StringBuilder scratchSegment = new StringBuilder();
-	  private char[] scratchBuffer = new char[32];
-
-	  /// <summary>
-	  /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
-	  /// </summary>
-	  /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
-	  public Stemmer(Dictionary dictionary)
-	  {
-		this.dictionary = dictionary;
-		this.affixReader = new ByteArrayDataInput(dictionary.affixData);
-	  }
-
-	  /// <summary>
-	  /// Find the stem(s) of the provided word.
-	  /// </summary>
-	  /// <param name="word"> Word to find the stems for </param>
-	  /// <returns> List of stems for the word </returns>
-	  public IList<CharsRef> stem(string word)
-	  {
-		return stem(word.ToCharArray(), word.Length);
-	  }
-
-	  /// <summary>
-	  /// Find the stem(s) of the provided word
-	  /// </summary>
-	  /// <param name="word"> Word to find the stems for </param>
-	  /// <returns> List of stems for the word </returns>
-	  public IList<CharsRef> stem(char[] word, int length)
-	  {
-
-		if (dictionary.needsInputCleaning)
-		{
-		  scratchSegment.Length = 0;
-		  scratchSegment.Append(word, 0, length);
-		  CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
-		  scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
-		  length = segment.Length;
-		  segment.getChars(0, length, scratchBuffer, 0);
-		  word = scratchBuffer;
-		}
-
-		IList<CharsRef> stems = new List<CharsRef>();
-		IntsRef forms = dictionary.lookupWord(word, 0, length);
-		if (forms != null)
-		{
-		  // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
-		  // just because it exists, does not make it valid...
-		  for (int i = 0; i < forms.length; i++)
-		  {
-			stems.Add(newStem(word, length));
-		  }
-		}
-		stems.AddRange(stem(word, length, -1, -1, -1, 0, true, true, false, false));
-		return stems;
-	  }
-
-	  /// <summary>
-	  /// Find the unique stem(s) of the provided word
-	  /// </summary>
-	  /// <param name="word"> Word to find the stems for </param>
-	  /// <returns> List of stems for the word </returns>
-	  public IList<CharsRef> uniqueStems(char[] word, int length)
-	  {
-		IList<CharsRef> stems = stem(word, length);
-		if (stems.Count < 2)
-		{
-		  return stems;
-		}
-		CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
-		IList<CharsRef> deduped = new List<CharsRef>();
-		foreach (CharsRef s in stems)
-		{
-		  if (!terms.contains(s))
-		  {
-			deduped.Add(s);
-			terms.add(s);
-		  }
-		}
-		return deduped;
-	  }
-
-	  private CharsRef newStem(char[] buffer, int length)
-	  {
-		if (dictionary.needsOutputCleaning)
-		{
-		  scratchSegment.Length = 0;
-		  scratchSegment.Append(buffer, 0, length);
-		  try
-		  {
-			Dictionary.applyMappings(dictionary.oconv, scratchSegment);
-		  }
-		  catch (IOException bogus)
-		  {
-			throw new Exception(bogus);
-		  }
-		  char[] cleaned = new char[scratchSegment.Length];
-		  scratchSegment.getChars(0, cleaned.Length, cleaned, 0);
-		  return new CharsRef(cleaned, 0, cleaned.Length);
-		}
-		else
-		{
-		  return new CharsRef(buffer, 0, length);
-		}
-	  }
-
-	  // ================================================= Helper Methods ================================================
-
-	  /// <summary>
-	  /// Generates a list of stems for the provided word
-	  /// </summary>
-	  /// <param name="word"> Word to generate the stems for </param>
-	  /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
-	  /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
-	  /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
-	  /// <param name="recursionDepth"> current recursiondepth </param>
-	  /// <param name="doPrefix"> true if we should remove prefixes </param>
-	  /// <param name="doSuffix"> true if we should remove suffixes </param>
-	  /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
-	  ///        if we are removing a suffix, and it has no continuation requirements, its ok.
-	  ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
-	  /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
-	  ///        this means inner most suffix must also contain circumfix flag. </param>
-	  /// <returns> List of stems, or empty list if no stems are found </returns>
-	  private IList<CharsRef> stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
-	  {
-
-		// TODO: allow this stuff to be reused by tokenfilter
-		IList<CharsRef> stems = new List<CharsRef>();
-
-		if (doPrefix && dictionary.prefixes != null)
-		{
-		  for (int i = length - 1; i >= 0; i--)
-		  {
-			IntsRef prefixes = dictionary.lookupPrefix(word, 0, i);
-			if (prefixes == null)
-			{
-			  continue;
-			}
-
-			for (int j = 0; j < prefixes.length; j++)
-			{
-			  int prefix = prefixes.ints[prefixes.offset + j];
-			  if (prefix == previous)
-			  {
-				continue;
-			  }
-			  affixReader.Position = 8 * prefix;
-			  char flag = (char)(affixReader.readShort() & 0xffff);
-			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
-			  int condition = (char)(affixReader.readShort() & 0xffff);
-			  bool crossProduct = (condition & 1) == 1;
-			  condition = (int)((uint)condition >> 1);
-			  char append = (char)(affixReader.readShort() & 0xffff);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final boolean compatible;
-			  bool compatible;
-			  if (recursionDepth == 0)
-			  {
-				compatible = true;
-			  }
-			  else if (crossProduct)
-			  {
-				// cross check incoming continuation class (flag of previous affix) against list.
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				Debug.Assert(prevFlag >= 0);
-				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
-			  }
-			  else
-			  {
-				compatible = false;
-			  }
-
-			  if (compatible)
-			  {
-				int deAffixedStart = i;
-				int deAffixedLength = length - deAffixedStart;
-
-				int stripStart = dictionary.stripOffsets[stripOrd];
-				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
-				int stripLength = stripEnd - stripStart;
-
-				if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
-				{
-				  continue;
-				}
-
-				char[] strippedWord = new char[stripLength + deAffixedLength];
-				Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
-				Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
-
-				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
-
-				stems.AddRange(stemList);
-			  }
-			}
-		  }
-		}
-
-		if (doSuffix && dictionary.suffixes != null)
-		{
-		  for (int i = 0; i < length; i++)
-		  {
-			IntsRef suffixes = dictionary.lookupSuffix(word, i, length - i);
-			if (suffixes == null)
-			{
-			  continue;
-			}
-
-			for (int j = 0; j < suffixes.length; j++)
-			{
-			  int suffix = suffixes.ints[suffixes.offset + j];
-			  if (suffix == previous)
-			  {
-				continue;
-			  }
-			  affixReader.Position = 8 * suffix;
-			  char flag = (char)(affixReader.readShort() & 0xffff);
-			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
-			  int condition = (char)(affixReader.readShort() & 0xffff);
-			  bool crossProduct = (condition & 1) == 1;
-			  condition = (int)((uint)condition >> 1);
-			  char append = (char)(affixReader.readShort() & 0xffff);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final boolean compatible;
-			  bool compatible;
-			  if (recursionDepth == 0)
-			  {
-				compatible = true;
-			  }
-			  else if (crossProduct)
-			  {
-				// cross check incoming continuation class (flag of previous affix) against list.
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				Debug.Assert(prevFlag >= 0);
-				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
-			  }
-			  else
-			  {
-				compatible = false;
-			  }
-
-			  if (compatible)
-			  {
-				int appendLength = length - i;
-				int deAffixedLength = length - appendLength;
-
-				int stripStart = dictionary.stripOffsets[stripOrd];
-				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
-				int stripLength = stripEnd - stripStart;
-
-				if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
-				{
-				  continue;
-				}
-
-				char[] strippedWord = new char[stripLength + deAffixedLength];
-				Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
-				Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
-
-				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
-
-				stems.AddRange(stemList);
-			  }
-			}
-		  }
-		}
-
-		return stems;
-	  }
-
-	  /// <summary>
-	  /// checks condition of the concatenation of two strings </summary>
-	  // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
-	  // but this is a little bit more complicated.
-	  private bool checkCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len)
-	  {
-		if (condition != 0)
-		{
-		  CharacterRunAutomaton pattern = dictionary.patterns[condition];
-		  int state = pattern.InitialState;
-		  for (int i = c1off; i < c1off + c1len; i++)
-		  {
-			state = pattern.step(state, c1[i]);
-			if (state == -1)
-			{
-			  return false;
-			}
-		  }
-		  for (int i = c2off; i < c2off + c2len; i++)
-		  {
-			state = pattern.step(state, c2[i]);
-			if (state == -1)
-			{
-			  return false;
-			}
-		  }
-		  return pattern.isAccept(state);
-		}
-		return true;
-	  }
-
-	  /// <summary>
-	  /// Applies the affix rule to the given word, producing a list of stems if any are found
-	  /// </summary>
-	  /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
-	  /// <param name="length"> valid length of stripped word </param>
-	  /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
-	  /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
-	  ///                   so we must check dictionary form against both to add it as a stem! </param>
-	  /// <param name="recursionDepth"> current recursion depth </param>
-	  /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
-	  /// <returns> List of stems for the word, or an empty list if none are found </returns>
-	  internal IList<CharsRef> applyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
-	  {
-		// TODO: just pass this in from before, no need to decode it twice
-		affixReader.Position = 8 * affix;
-		char flag = (char)(affixReader.readShort() & 0xffff);
-		affixReader.skipBytes(2); // strip
-		int condition = (char)(affixReader.readShort() & 0xffff);
-		bool crossProduct = (condition & 1) == 1;
-		condition = (int)((uint)condition >> 1);
-		char append = (char)(affixReader.readShort() & 0xffff);
-
-		IList<CharsRef> stems = new List<CharsRef>();
-
-		IntsRef forms = dictionary.lookupWord(strippedWord, 0, length);
-		if (forms != null)
-		{
-		  for (int i = 0; i < forms.length; i++)
-		  {
-			dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
-			char[] wordFlags = Dictionary.decodeFlags(scratch);
-			if (Dictionary.hasFlag(wordFlags, flag))
-			{
-			  // confusing: in this one exception, we already chained the first prefix against the second,
-			  // so it doesnt need to be checked against the word
-			  bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
-			  if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag))
-			  {
-				// see if we can chain prefix thru the suffix continuation class (only if it has any!)
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
-				{
-				  continue;
-				}
-			  }
-
-			  // if circumfix was previously set by a prefix, we must check this suffix,
-			  // to ensure it has it, and vice versa
-			  if (dictionary.circumfix != -1)
-			  {
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				bool suffixCircumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
-				if (circumfix != suffixCircumfix)
-				{
-				  continue;
-				}
-			  }
-			  stems.Add(newStem(strippedWord, length));
-			}
-		  }
-		}
-
-		// if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
-		if (dictionary.circumfix != -1 && !circumfix && prefix)
-		{
-		  dictionary.flagLookup.get(append, scratch);
-		  char[] appendFlags = Dictionary.decodeFlags(scratch);
-		  circumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
-		}
-
-		if (crossProduct)
-		{
-		  if (recursionDepth == 0)
-		  {
-			if (prefix)
-			{
-			  // we took away the first prefix.
-			  // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
-			  // COMPLEXPREFIXES = false: combine with a suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
-			}
-			else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
-			{
-			  // we took away a suffix.
-			  // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
-			  // COMPLEXPREFIXES = false: combine with another suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
-			}
-		  }
-		  else if (recursionDepth == 1)
-		  {
-			if (prefix && dictionary.complexPrefixes)
-			{
-			  // we took away the second prefix: go look for another suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
-			}
-			else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
-			{
-			  // we took away a prefix, then a suffix: go look for another suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
-			}
-		  }
-		}
-
-		return stems;
-	  }
-
-	  /// <summary>
-	  /// Checks if the given flag cross checks with the given array of flags
-	  /// </summary>
-	  /// <param name="flag"> Flag to cross check with the array of flags </param>
-	  /// <param name="flags"> Array of flags to cross check against.  Can be {@code null} </param>
-	  /// <returns> {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise </returns>
-	  private bool hasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
-	  {
-		return (flags.Length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0;
-	  }
-	}
-
+    /// <summary>
+    /// Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word.  It
+    /// conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
+    /// </summary>
+    internal sealed class Stemmer
+    {
+        private readonly Dictionary dictionary;
+        private readonly BytesRef scratch = new BytesRef();
+        private readonly StringBuilder segment = new StringBuilder();
+        private readonly ByteArrayDataInput affixReader;
+
+        // used for normalization
+        private readonly StringBuilder scratchSegment = new StringBuilder();
+        private char[] scratchBuffer = new char[32];
+
+        /// <summary>
+        /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
+        /// </summary>
+        /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
+        public Stemmer(Dictionary dictionary)
+        {
+            this.dictionary = dictionary;
+            this.affixReader = new ByteArrayDataInput(dictionary.affixData);
+        }
+
+        /// <summary>
+        /// Find the stem(s) of the provided word.
+        /// </summary>
+        /// <param name="word"> Word to find the stems for </param>
+        /// <returns> List of stems for the word </returns>
+        public IList<CharsRef> Stem(string word)
+        {
+            return Stem(word.ToCharArray(), word.Length);
+        }
+
+        /// <summary>
+        /// Find the stem(s) of the provided word
+        /// </summary>
+        /// <param name="word"> Word to find the stems for </param>
+        /// <returns> List of stems for the word </returns>
+        public IList<CharsRef> Stem(char[] word, int length)
+        {
+
+            if (dictionary.needsInputCleaning)
+            {
+                scratchSegment.Length = 0;
+                scratchSegment.Append(word, 0, length);
+                string cleaned = dictionary.CleanInput(scratchSegment.ToString(), segment);
+                scratchBuffer = ArrayUtil.Grow(scratchBuffer, cleaned.Length);
+                length = segment.Length;
+                segment.CopyTo(0, scratchBuffer, 0, length);
+                word = scratchBuffer;
+            }
+
+            List<CharsRef> stems = new List<CharsRef>();
+            IntsRef forms = dictionary.LookupWord(word, 0, length);
+            if (forms != null)
+            {
+                // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
+                // just because it exists, does not make it valid...
+                for (int i = 0; i < forms.Length; i++)
+                {
+                    stems.Add(NewStem(word, length));
+                }
+            }
+            stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false));
+            return stems;
+        }
+
+        /// <summary>
+        /// Find the unique stem(s) of the provided word
+        /// </summary>
+        /// <param name="word"> Word to find the stems for </param>
+        /// <returns> List of stems for the word </returns>
+        public IList<CharsRef> UniqueStems(char[] word, int length)
+        {
+            IList<CharsRef> stems = Stem(word, length);
+            if (stems.Count < 2)
+            {
+                return stems;
+            }
+            CharArraySet terms = new CharArraySet(LuceneVersion.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+            IList<CharsRef> deduped = new List<CharsRef>();
+            foreach (CharsRef s in stems)
+            {
+                if (!terms.Contains(s))
+                {
+                    deduped.Add(s);
+                    terms.Add(s);
+                }
+            }
+            return deduped;
+        }
+
+        private CharsRef NewStem(char[] buffer, int length)
+        {
+            if (dictionary.needsOutputCleaning)
+            {
+                scratchSegment.Length = 0;
+                scratchSegment.Append(buffer, 0, length);
+                try
+                {
+                    Dictionary.ApplyMappings(dictionary.oconv, scratchSegment);
+                }
+                catch (IOException bogus)
+                {
+                    throw new Exception(bogus.Message, bogus);
+                }
+                char[] cleaned = new char[scratchSegment.Length];
+                scratchSegment.CopyTo(0, cleaned, 0, cleaned.Length);
+                return new CharsRef(cleaned, 0, cleaned.Length);
+            }
+            else
+            {
+                return new CharsRef(buffer, 0, length);
+            }
+        }
+
+        // ================================================= Helper Methods ================================================
+
+        /// <summary>
+        /// Generates a list of stems for the provided word
+        /// </summary>
+        /// <param name="word"> Word to generate the stems for </param>
+        /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
+        /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
+        /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
+        /// <param name="recursionDepth"> current recursiondepth </param>
+        /// <param name="doPrefix"> true if we should remove prefixes </param>
+        /// <param name="doSuffix"> true if we should remove suffixes </param>
+        /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
+        ///        if we are removing a suffix, and it has no continuation requirements, its ok.
+        ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
+        /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
+        ///        this means inner most suffix must also contain circumfix flag. </param>
+        /// <returns> List of stems, or empty list if no stems are found </returns>
+        private IList<CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
+        {
+
+            // TODO: allow this stuff to be reused by tokenfilter
+            List<CharsRef> stems = new List<CharsRef>();
+
+            if (doPrefix && dictionary.prefixes != null)
+            {
+                for (int i = length - 1; i >= 0; i--)
+                {
+                    IntsRef prefixes = dictionary.LookupPrefix(word, 0, i);
+                    if (prefixes == null)
+                    {
+                        continue;
+                    }
+
+                    for (int j = 0; j < prefixes.Length; j++)
+                    {
+                        int prefix = prefixes.Ints[prefixes.Offset + j];
+                        if (prefix == previous)
+                        {
+                            continue;
+                        }
+                        affixReader.Position = 8 * prefix;
+                        char flag = (char)(affixReader.ReadShort() & 0xffff);
+                        char stripOrd = (char)(affixReader.ReadShort() & 0xffff);
+                        int condition = (char)(affixReader.ReadShort() & 0xffff);
+                        bool crossProduct = (condition & 1) == 1;
+                        condition = (int)((uint)condition >> 1);
+                        char append = (char)(affixReader.ReadShort() & 0xffff);
+
+                        bool compatible;
+                        if (recursionDepth == 0)
+                        {
+                            compatible = true;
+                        }
+                        else if (crossProduct)
+                        {
+                            // cross check incoming continuation class (flag of previous affix) against list.
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            Debug.Assert(prevFlag >= 0);
+                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+                        }
+                        else
+                        {
+                            compatible = false;
+                        }
+
+                        if (compatible)
+                        {
+                            int deAffixedStart = i;
+                            int deAffixedLength = length - deAffixedStart;
+
+                            int stripStart = dictionary.stripOffsets[stripOrd];
+                            int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+                            int stripLength = stripEnd - stripStart;
+
+                            if (!CheckCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
+                            {
+                                continue;
+                            }
+
+                            char[] strippedWord = new char[stripLength + deAffixedLength];
+                            Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
+                            Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
+
+                            IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
+
+                            stems.AddRange(stemList);
+                        }
+                    }
+                }
+            }
+
+            if (doSuffix && dictionary.suffixes != null)
+            {
+                for (int i = 0; i < length; i++)
+                {
+                    IntsRef suffixes = dictionary.LookupSuffix(word, i, length - i);
+                    if (suffixes == null)
+                    {
+                        continue;
+                    }
+
+                    for (int j = 0; j < suffixes.Length; j++)
+                    {
+                        int suffix = suffixes.Ints[suffixes.Offset + j];
+                        if (suffix == previous)
+                        {
+                            continue;
+                        }
+                        affixReader.Position = 8 * suffix;
+                        char flag = (char)(affixReader.ReadShort() & 0xffff);
+                        char stripOrd = (char)(affixReader.ReadShort() & 0xffff);
+                        int condition = (char)(affixReader.ReadShort() & 0xffff);
+                        bool crossProduct = (condition & 1) == 1;
+                        condition = (int)((uint)condition >> 1);
+                        char append = (char)(affixReader.ReadShort() & 0xffff);
+
+                        bool compatible;
+                        if (recursionDepth == 0)
+                        {
+                            compatible = true;
+                        }
+                        else if (crossProduct)
+                        {
+                            // cross check incoming continuation class (flag of previous affix) against list.
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            Debug.Assert(prevFlag >= 0);
+                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+                        }
+                        else
+                        {
+                            compatible = false;
+                        }
+
+                        if (compatible)
+                        {
+                            int appendLength = length - i;
+                            int deAffixedLength = length - appendLength;
+
+                            int stripStart = dictionary.stripOffsets[stripOrd];
+                            int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+                            int stripLength = stripEnd - stripStart;
+
+                            if (!CheckCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
+                            {
+                                continue;
+                            }
+
+                            char[] strippedWord = new char[stripLength + deAffixedLength];
+                            Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
+                            Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
+
+                            IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
+
+                            stems.AddRange(stemList);
+                        }
+                    }
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        /// checks condition of the concatenation of two strings </summary>
+        // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
+        // but this is a little bit more complicated.
+        private bool CheckCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len)
+        {
+            if (condition != 0)
+            {
+                CharacterRunAutomaton pattern = dictionary.patterns[condition];
+                int state = pattern.InitialState;
+                for (int i = c1off; i < c1off + c1len; i++)
+                {
+                    state = pattern.Step(state, c1[i]);
+                    if (state == -1)
+                    {
+                        return false;
+                    }
+                }
+                for (int i = c2off; i < c2off + c2len; i++)
+                {
+                    state = pattern.Step(state, c2[i]);
+                    if (state == -1)
+                    {
+                        return false;
+                    }
+                }
+                return pattern.IsAccept(state);
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// Applies the affix rule to the given word, producing a list of stems if any are found
+        /// </summary>
+        /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
+        /// <param name="length"> valid length of stripped word </param>
+        /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
+        /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
+        ///                   so we must check dictionary form against both to add it as a stem! </param>
+        /// <param name="recursionDepth"> current recursion depth </param>
+        /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
+        /// <returns> List of stems for the word, or an empty list if none are found </returns>
+        internal IList<CharsRef> ApplyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
+        {
+            // TODO: just pass this in from before, no need to decode it twice
+            affixReader.Position = 8 * affix;
+            char flag = (char)(affixReader.ReadShort() & 0xffff);
+            affixReader.SkipBytes(2); // strip
+            int condition = (char)(affixReader.ReadShort() & 0xffff);
+            bool crossProduct = (condition & 1) == 1;
+            condition = (int)((uint)condition >> 1);
+            char append = (char)(affixReader.ReadShort() & 0xffff);
+
+            List<CharsRef> stems = new List<CharsRef>();
+
+            IntsRef forms = dictionary.LookupWord(strippedWord, 0, length);
+            if (forms != null)
+            {
+                for (int i = 0; i < forms.Length; i++)
+                {
+                    dictionary.flagLookup.Get(forms.Ints[forms.Offset + i], scratch);
+                    char[] wordFlags = Dictionary.DecodeFlags(scratch);
+                    if (Dictionary.HasFlag(wordFlags, flag))
+                    {
+                        // confusing: in this one exception, we already chained the first prefix against the second,
+                        // so it doesnt need to be checked against the word
+                        bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
+                        if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.HasFlag(wordFlags, (char)prefixFlag))
+                        {
+                            // see if we can chain prefix thru the suffix continuation class (only if it has any!)
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            if (!HasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
+                            {
+                                continue;
+                            }
+                        }
+
+                        // if circumfix was previously set by a prefix, we must check this suffix,
+                        // to ensure it has it, and vice versa
+                        if (dictionary.circumfix != -1)
+                        {
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            bool suffixCircumfix = Dictionary.HasFlag(appendFlags, (char)dictionary.circumfix);
+                            if (circumfix != suffixCircumfix)
+                            {
+                                continue;
+                            }
+                        }
+                        stems.Add(NewStem(strippedWord, length));
+                    }
+                }
+            }
+
+            // if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
+            if (dictionary.circumfix != -1 && !circumfix && prefix)
+            {
+                dictionary.flagLookup.Get(append, scratch);
+                char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                circumfix = Dictionary.HasFlag(appendFlags, (char)dictionary.circumfix);
+            }
+
+            if (crossProduct)
+            {
+                if (recursionDepth == 0)
+                {
+                    if (prefix)
+                    {
+                        // we took away the first prefix.
+                        // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
+                        // COMPLEXPREFIXES = false: combine with a suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+                    }
+                    else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+                    {
+                        // we took away a suffix.
+                        // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
+                        // COMPLEXPREFIXES = false: combine with another suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+                    }
+                }
+                else if (recursionDepth == 1)
+                {
+                    if (prefix && dictionary.complexPrefixes)
+                    {
+                        // we took away the second prefix: go look for another suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+                    }
+                    else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+                    {
+                        // we took away a prefix, then a suffix: go look for another suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+                    }
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        /// Checks if the given flag cross checks with the given array of flags
+        /// </summary>
+        /// <param name="flag"> Flag to cross check with the array of flags </param>
+        /// <param name="flags"> Array of flags to cross check against.  Can be {@code null} </param>
+        /// <returns> {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise </returns>
+        private bool HasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
+        {
+            return (flags.Length == 0 && matchEmpty) || Arrays.BinarySearch(flags, flag) >= 0;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 1c6d7fc..a74ed0b 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -191,6 +191,11 @@
     <Compile Include="Analysis\Hi\HindiStemFilter.cs" />
     <Compile Include="Analysis\Hi\HindiStemFilterFactory.cs" />
     <Compile Include="Analysis\Hi\HindiStemmer.cs" />
+    <Compile Include="Analysis\Hunspell\Dictionary.cs" />
+    <Compile Include="Analysis\Hunspell\HunspellStemFilter.cs" />
+    <Compile Include="Analysis\Hunspell\HunspellStemFilterFactory.cs" />
+    <Compile Include="Analysis\Hunspell\ISO8859_14Decoder.cs" />
+    <Compile Include="Analysis\Hunspell\Stemmer.cs" />
     <Compile Include="Analysis\Hu\HungarianAnalyzer.cs" />
     <Compile Include="Analysis\Hu\HungarianLightStemFilter.cs" />
     <Compile Include="Analysis\Hu\HungarianLightStemFilterFactory.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs b/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
index 277dd3a..b27c855 100644
--- a/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
+++ b/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
@@ -27,7 +27,7 @@ namespace Lucene.Net.Support
             if (dict == null)
                 return default(TValue);
 
-            var oldValue = dict[key];
+            var oldValue = dict.ContainsKey(key) ? dict[key] : default(TValue);
             dict[key] = value;
             return oldValue;
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
index a1e0353..2c691a7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
@@ -1,9 +1,13 @@
-using System.Collections.Generic;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,76 +24,68 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// base class for hunspell stemmer tests </summary>
+    public abstract class StemmerTestBase : LuceneTestCase
+    {
+        private static Stemmer stemmer;
 
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-
-	/// <summary>
-	/// base class for hunspell stemmer tests </summary>
-	internal abstract class StemmerTestBase : LuceneTestCase
-	{
-	  private static Stemmer stemmer;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: static void init(String affix, String dictionary) throws java.io.IOException, java.text.ParseException
-	  internal static void init(string affix, string dictionary)
-	  {
-		init(false, affix, dictionary);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: static void init(boolean ignoreCase, String affix, String... dictionaries) throws java.io.IOException, java.text.ParseException
-	  internal static void init(bool ignoreCase, string affix, params string[] dictionaries)
-	  {
-		if (dictionaries.Length == 0)
-		{
-		  throw new System.ArgumentException("there must be at least one dictionary");
-		}
+        internal static void Init(string affix, string dictionary)
+        {
+            Init(false, affix, dictionary);
+        }
 
-		System.IO.Stream affixStream = typeof(StemmerTestBase).getResourceAsStream(affix);
-		if (affixStream == null)
-		{
-		  throw new FileNotFoundException("file not found: " + affix);
-		}
+        internal static void Init(bool ignoreCase, string affix, params string[] dictionaries)
+        {
+            if (dictionaries.Length == 0)
+            {
+                throw new System.ArgumentException("there must be at least one dictionary");
+            }
 
-		System.IO.Stream[] dictStreams = new System.IO.Stream[dictionaries.Length];
-		for (int i = 0; i < dictionaries.Length; i++)
-		{
-		  dictStreams[i] = typeof(StemmerTestBase).getResourceAsStream(dictionaries[i]);
-		  if (dictStreams[i] == null)
-		  {
-			throw new FileNotFoundException("file not found: " + dictStreams[i]);
-		  }
-		}
+            System.IO.Stream affixStream = typeof(StemmerTestBase).getResourceAsStream(affix);
+            if (affixStream == null)
+            {
+                throw new FileNotFoundException("file not found: " + affix);
+            }
 
-		try
-		{
-		  Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
-		  stemmer = new Stemmer(dictionary);
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(affixStream);
-		  IOUtils.closeWhileHandlingException(dictStreams);
-		}
-	  }
+            System.IO.Stream[] dictStreams = new System.IO.Stream[dictionaries.Length];
+            for (int i = 0; i < dictionaries.Length; i++)
+            {
+                dictStreams[i] = typeof(StemmerTestBase).getResourceAsStream(dictionaries[i]);
+                if (dictStreams[i] == null)
+                {
+                    throw new FileNotFoundException("file not found: " + dictStreams[i]);
+                }
+            }
 
-	  internal static void assertStemsTo(string s, params string[] expected)
-	  {
-		assertNotNull(stemmer);
-		Arrays.sort(expected);
+            try
+            {
+                Dictionary dictionary = new Dictionary(affixStream, Arrays.AsList(dictStreams), ignoreCase);
+                stemmer = new Stemmer(dictionary);
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(affixStream);
+                IOUtils.CloseWhileHandlingException(null, dictStreams);
+            }
+        }
 
-		IList<CharsRef> stems = stemmer.stem(s);
-		string[] actual = new string[stems.Count];
-		for (int i = 0; i < actual.Length; i++)
-		{
-		  actual[i] = stems[i].ToString();
-		}
-		Arrays.sort(actual);
+        internal static void AssertStemsTo(string s, params string[] expected)
+        {
+            assertNotNull(stemmer);
+            Array.Sort(expected);
 
-		assertArrayEquals("expected=" + Arrays.ToString(expected) + ",actual=" + Arrays.ToString(actual), expected, actual);
-	  }
-	}
+            IList<CharsRef> stems = stemmer.Stem(s);
+            string[] actual = new string[stems.Count];
+            for (int i = 0; i < actual.Length; i++)
+            {
+                actual[i] = stems[i].ToString();
+            }
+            Array.Sort(actual);
 
+            // LUCENENET: Originally, the code was as follows, but it failed to properly compare the arrays.
+            //assertArrayEquals("expected=" + Arrays.ToString(expected) + ",actual=" + Arrays.ToString(actual), expected, actual);
+            Assert.AreEqual(expected, actual, "expected=" + Arrays.ToString(expected) + ",actual=" + Arrays.ToString(actual));
+        }
+    }
 }
\ No newline at end of file


Mime
View raw message