lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [04/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:08 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
new file mode 100644
index 0000000..c0a52c6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
@@ -0,0 +1,151 @@
+using System;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Normalizes Turkish token text to lower case.
+	/// <para>
+	/// Turkish and Azeri have unique casing behavior for some characters. This
+	/// filter applies Turkish lowercase rules. For more information, see <a
+	/// href="http://en.wikipedia.org/wiki/Turkish_dotted_and_dotless_I"
+	/// >http://en.wikipedia.org/wiki/Turkish_dotted_and_dotless_I</a>
+	/// </para>
+	/// </summary>
+	public sealed class TurkishLowerCaseFilter : TokenFilter
+	{
+	  private const int LATIN_CAPITAL_LETTER_I = '\u0049';
+	  private const int LATIN_SMALL_LETTER_I = '\u0069';
+	  private const int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
+	  private const int COMBINING_DOT_ABOVE = '\u0307';
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create a new TurkishLowerCaseFilter, that normalizes Turkish token text 
+	  /// to lower case.
+	  /// </summary>
+	  /// <param name="in"> TokenStream to filter </param>
+	  public TurkishLowerCaseFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		bool iOrAfter = false;
+
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		  char[] buffer = termAtt.buffer();
+		  int length = termAtt.length();
+		  for (int i = 0; i < length;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = Character.codePointAt(buffer, i, length);
+			int ch = char.codePointAt(buffer, i, length);
+
+			iOrAfter = (ch == LATIN_CAPITAL_LETTER_I || (iOrAfter && char.getType(ch) == char.NON_SPACING_MARK));
+
+			if (iOrAfter) // all the special I turkish handling happens here.
+			{
+			  switch (ch)
+			  {
+				// remove COMBINING_DOT_ABOVE to mimic composed lowercase
+				case COMBINING_DOT_ABOVE:
+				  length = delete(buffer, i, length);
+				  continue;
+				// i itself, it depends if it is followed by COMBINING_DOT_ABOVE
+				// if it is, we will make it small i and later remove the dot
+				case LATIN_CAPITAL_LETTER_I:
+				  if (isBeforeDot(buffer, i + 1, length))
+				  {
+					buffer[i] = (char)LATIN_SMALL_LETTER_I;
+				  }
+				  else
+				  {
+					buffer[i] = (char)LATIN_SMALL_LETTER_DOTLESS_I;
+					// below is an optimization. no COMBINING_DOT_ABOVE follows,
+					// so don't waste time calculating Character.getType(), etc
+					iOrAfter = false;
+				  }
+				  i++;
+				  continue;
+			  }
+			}
+
+			i += char.toChars(char.ToLower(ch), buffer, i);
+		  }
+
+		  termAtt.Length = length;
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+
+	  /// <summary>
+	  /// lookahead for a combining dot above.
+	  /// other NSMs may be in between.
+	  /// </summary>
+	  private bool isBeforeDot(char[] s, int pos, int len)
+	  {
+		for (int i = pos; i < len;)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = Character.codePointAt(s, i, len);
+		  int ch = char.codePointAt(s, i, len);
+		  if (char.getType(ch) != char.NON_SPACING_MARK)
+		  {
+			return false;
+		  }
+		  if (ch == COMBINING_DOT_ABOVE)
+		  {
+			return true;
+		  }
+		  i += char.charCount(ch);
+		}
+
+		return false;
+	  }
+
+	  /// <summary>
+	  /// delete a character in-place.
+	  /// rarely happens, only if COMBINING_DOT_ABOVE is found after an i
+	  /// </summary>
+	  private int delete(char[] s, int pos, int len)
+	  {
+		if (pos < len)
+		{
+		  Array.Copy(s, pos + 1, s, pos, len - pos - 1);
+		}
+
+		return len - 1;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
new file mode 100644
index 0000000..7edf5e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="TurkishLowerCaseFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class TurkishLowerCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new TurkishLowerCaseFilterFactory </summary>
+	  public TurkishLowerCaseFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new TurkishLowerCaseFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
new file mode 100644
index 0000000..8cf5e28
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -0,0 +1,406 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Abstract parent class for analysis factories <seealso cref="TokenizerFactory"/>,
+	/// <seealso cref="TokenFilterFactory"/> and <seealso cref="CharFilterFactory"/>.
+	/// <para>
+	/// The typical lifecycle for a factory consumer is:
+	/// <ol>
+	///   <li>Create factory via its constructor (or via XXXFactory.forName)
+	///   <li>(Optional) If the factory uses resources such as files, <seealso cref="ResourceLoaderAware#inform(ResourceLoader)"/> is called to initialize those resources.
+	///   <li>Consumer calls create() to obtain instances.
+	/// </ol>
+	/// </para>
+	/// </summary>
+	public abstract class AbstractAnalysisFactory
+	{
+	  public const string LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
+
+	  /// <summary>
+	  /// The original args, before any processing </summary>
+	  private readonly IDictionary<string, string> originalArgs;
+
+	  /// <summary>
+	  /// the luceneVersion arg </summary>
+	  protected internal readonly Lucene.Net.Util.Version luceneMatchVersion;
+	  /// <summary>
+	  /// whether the luceneMatchVersion arg is explicitly specified in the serialized schema </summary>
+	  private bool isExplicitLuceneMatchVersion = false;
+
+	  /// <summary>
+	  /// Initialize this factory via a set of key-value pairs.
+	  /// </summary>
+	  protected internal AbstractAnalysisFactory(IDictionary<string, string> args)
+	  {
+		originalArgs = Collections.UnmodifiableMap(new Dictionary<>(args));
+		string version = get(args, LUCENE_MATCH_VERSION_PARAM);
+		luceneMatchVersion = version == null ? null : Version.ParseLeniently(version);
+		args.Remove(CLASS_NAME); // consume the class arg
+	  }
+
+	  public IDictionary<string, string> OriginalArgs
+	  {
+		  get
+		  {
+			return originalArgs;
+		  }
+	  }
+
+	   /// <summary>
+	   /// this method can be called in the <seealso cref="TokenizerFactory#create(java.io.Reader)"/>
+	   /// or <seealso cref="TokenFilterFactory#create(org.apache.lucene.analysis.TokenStream)"/> methods,
+	   /// to inform user, that for this factory a <seealso cref="#luceneMatchVersion"/> is required 
+	   /// </summary>
+	  protected internal void assureMatchVersion()
+	  {
+		if (luceneMatchVersion == null)
+		{
+//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
+		  throw new System.ArgumentException("Configuration Error: Factory '" + this.GetType().FullName + "' needs a 'luceneMatchVersion' parameter");
+		}
+	  }
+
+	  public Version LuceneMatchVersion
+	  {
+		  get
+		  {
+			return this.luceneMatchVersion;
+		  }
+	  }
+
+	  public virtual string require(IDictionary<string, string> args, string name)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  throw new System.ArgumentException("Configuration Error: missing parameter '" + name + "'");
+		}
+		return s;
+	  }
+	  public virtual string require(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
+	  {
+		return require(args, name, allowedValues, true);
+	  }
+	  public virtual string require(IDictionary<string, string> args, string name, ICollection<string> allowedValues, bool caseSensitive)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  throw new System.ArgumentException("Configuration Error: missing parameter '" + name + "'");
+		}
+		else
+		{
+		  foreach (string allowedValue in allowedValues)
+		  {
+			if (caseSensitive)
+			{
+			  if (s.Equals(allowedValue))
+			  {
+				return s;
+			  }
+			}
+			else
+			{
+			  if (s.Equals(allowedValue, StringComparison.CurrentCultureIgnoreCase))
+			  {
+				return s;
+			  }
+			}
+		  }
+		  throw new System.ArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+		}
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name)
+	  {
+		return args.Remove(name); // defaultVal = null
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, string defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : s;
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
+	  {
+		return get(args, name, allowedValues, null); // defaultVal = null
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal)
+	  {
+		return get(args, name, allowedValues, defaultVal, true);
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal, bool caseSensitive)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  return defaultVal;
+		}
+		else
+		{
+		  foreach (string allowedValue in allowedValues)
+		  {
+			if (caseSensitive)
+			{
+			  if (s.Equals(allowedValue))
+			  {
+				return s;
+			  }
+			}
+			else
+			{
+			  if (s.Equals(allowedValue, StringComparison.CurrentCultureIgnoreCase))
+			  {
+				return s;
+			  }
+			}
+		  }
+		  throw new System.ArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+		}
+	  }
+
+	  protected internal int requireInt(IDictionary<string, string> args, string name)
+	  {
+		return int.Parse(require(args, name));
+	  }
+	  protected internal int getInt(IDictionary<string, string> args, string name, int defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : int.Parse(s);
+	  }
+
+	  protected internal bool requireBoolean(IDictionary<string, string> args, string name)
+	  {
+		return bool.Parse(require(args, name));
+	  }
+	  protected internal bool getBoolean(IDictionary<string, string> args, string name, bool defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : bool.Parse(s);
+	  }
+
+	  protected internal float requireFloat(IDictionary<string, string> args, string name)
+	  {
+		return float.Parse(require(args, name));
+	  }
+	  protected internal float getFloat(IDictionary<string, string> args, string name, float defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : float.Parse(s);
+	  }
+
+	  public virtual char requireChar(IDictionary<string, string> args, string name)
+	  {
+		return require(args, name)[0];
+	  }
+	  public virtual char getChar(IDictionary<string, string> args, string name, char defaultValue)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  return defaultValue;
+		}
+		else
+		{
+		  if (s.Length != 1)
+		  {
+			throw new System.ArgumentException(name + " should be a char. \"" + s + "\" is invalid");
+		  }
+		  else
+		  {
+			return s[0];
+		  }
+		}
+	  }
+
+	  private static readonly Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
+
+	  /// <summary>
+	  /// Returns whitespace- and/or comma-separated set of values, or null if none are found </summary>
+	  public virtual HashSet<string> getSet(IDictionary<string, string> args, string name)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		 return null;
+		}
+		else
+		{
+		  HashSet<string> set = null;
+		  Matcher matcher = ITEM_PATTERN.matcher(s);
+		  if (matcher.find())
+		  {
+			set = new HashSet<>();
+			set.Add(matcher.group(0));
+			while (matcher.find())
+			{
+			  set.Add(matcher.group(0));
+			}
+		  }
+		  return set;
+		}
+	  }
+
+	  /// <summary>
+	  /// Compiles a pattern for the value of the specified argument key <code>name</code> 
+	  /// </summary>
+	  protected internal Pattern GetPattern(IDictionary<string, string> args, string name)
+	  {
+		try
+		{
+		  return Pattern.compile(require(args, name));
+		}
+		catch (PatternSyntaxException e)
+		{
+		  throw new System.ArgumentException("Configuration Error: '" + name + "' can not be parsed in " + this.GetType().Name, e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which
+	  /// can be a comma-separated list of filenames
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
+	  protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
+	  {
+		assureMatchVersion();
+		IList<string> files = splitFileNames(wordFiles);
+		CharArraySet words = null;
+		if (files.Count > 0)
+		{
+		  // default stopwords list has 35 or so words, but maybe don't make it that
+		  // big to start
+		  words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
+		  foreach (string file in files)
+		  {
+			IList<string> wlist = getLines(loader, file.Trim());
+			words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
+		  }
+		}
+		return words;
+	  }
+
+	  /// <summary>
+	  /// Returns the resource's lines (with content treated as UTF-8)
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final java.util.List<String> getLines(ResourceLoader loader, String resource) throws java.io.IOException
+	  protected internal IList<string> getLines(ResourceLoader loader, string resource)
+	  {
+		return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
+	  }
+
+	  /// <summary>
+	  /// same as <seealso cref="#getWordSet(ResourceLoader, String, boolean)"/>,
+	  /// except the input is in snowball format. 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final CharArraySet getSnowballWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
+	  protected internal CharArraySet getSnowballWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
+	  {
+		assureMatchVersion();
+		IList<string> files = splitFileNames(wordFiles);
+		CharArraySet words = null;
+		if (files.Count > 0)
+		{
+		  // default stopwords list has 35 or so words, but maybe don't make it that
+		  // big to start
+		  words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
+		  foreach (string file in files)
+		  {
+			InputStream stream = null;
+			Reader reader = null;
+			try
+			{
+			  stream = loader.openResource(file.Trim());
+			  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+			  reader = new InputStreamReader(stream, decoder);
+			  WordlistLoader.getSnowballWordSet(reader, words);
+			}
+			finally
+			{
+			  IOUtils.closeWhileHandlingException(reader, stream);
+			}
+		  }
+		}
+		return words;
+	  }
+
+	  /// <summary>
+	  /// Splits file names separated by comma character.
+	  /// File names can contain comma characters escaped by backslash '\'
+	  /// </summary>
+	  /// <param name="fileNames"> the string containing file names </param>
+	  /// <returns> a list of file names with the escaping backslashed removed </returns>
+	  protected internal IList<string> splitFileNames(string fileNames)
+	  {
+		if (fileNames == null)
+		{
+		  return System.Linq.Enumerable.Empty<string>();
+		}
+
+		IList<string> result = new List<string>();
+		foreach (string file in fileNames.Split("(?<!\\\\),", true))
+		{
+		  result.Add(file.replaceAll("\\\\(?=,)", ""));
+		}
+
+		return result;
+	  }
+
+	  private const string CLASS_NAME = "class";
+
+	  /// <returns> the string used to specify the concrete class name in a serialized representation: the class arg.  
+	  ///         If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}. </returns>
+	  public virtual string ClassArg
+	  {
+		  get
+		  {
+			if (null != originalArgs)
+			{
+			  string className = originalArgs[CLASS_NAME];
+			  if (null != className)
+			  {
+				return className;
+			  }
+			}
+			return this.GetType().Name;
+		  }
+	  }
+
+	  public virtual bool ExplicitLuceneMatchVersion
+	  {
+		  get
+		  {
+			return isExplicitLuceneMatchVersion;
+		  }
+		  set
+		  {
+			this.isExplicitLuceneMatchVersion = value;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
new file mode 100644
index 0000000..351446f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -0,0 +1,165 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Threading;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SPIClassIterator = org.apache.lucene.util.SPIClassIterator;
+
+	/// <summary>
+	/// Helper class for loading named SPIs from classpath (e.g. Tokenizers, TokenStreams).
+	/// @lucene.internal
+	/// </summary>
+	internal sealed class AnalysisSPILoader<S> where S : AbstractAnalysisFactory
+	{
+
+	  private volatile IDictionary<string, Type> services = Collections.emptyMap();
+	  private readonly Type clazz;
+	  private readonly string[] suffixes;
+
+	  public AnalysisSPILoader(Type clazz) : this(clazz, new string[] {clazz.SimpleName})
+	  {
+	  }
+
+	  public AnalysisSPILoader(Type clazz, ClassLoader loader) : this(clazz, new string[] {clazz.SimpleName}, loader)
+	  {
+	  }
+
+	  public AnalysisSPILoader(Type clazz, string[] suffixes) : this(clazz, suffixes, Thread.CurrentThread.ContextClassLoader)
+	  {
+	  }
+
+	  public AnalysisSPILoader(Type clazz, string[] suffixes, ClassLoader classloader)
+	  {
+		this.clazz = clazz;
+		this.suffixes = suffixes;
+		// if clazz' classloader is not a parent of the given one, we scan clazz's classloader, too:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ClassLoader clazzClassloader = clazz.getClassLoader();
+		ClassLoader clazzClassloader = clazz.ClassLoader;
+		if (clazzClassloader != null && !SPIClassIterator.isParentClassLoader(clazzClassloader, classloader))
+		{
+		  reload(clazzClassloader);
+		}
+		reload(classloader);
+	  }
+
+	  /// <summary>
+	  /// Reloads the internal SPI list from the given <seealso cref="ClassLoader"/>.
+	  /// Changes to the service list are visible after the method ends, all
+	  /// iterators (e.g., from <seealso cref="#availableServices()"/>,...) stay consistent. 
+	  /// 
+	  /// <para><b>NOTE:</b> Only new service providers are added, existing ones are
+	  /// never removed or replaced.
+	  /// 
+	  /// </para>
+	  /// <para><em>This method is expensive and should only be called for discovery
+	  /// of new service providers on the given classpath/classloader!</em>
+	  /// </para>
+	  /// </summary>
+	  public void reload(ClassLoader classloader)
+	  {
+		  lock (this)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.LinkedHashMap<String,Class> services = new java.util.LinkedHashMap<>(this.services);
+			LinkedHashMap<string, Type> services = new LinkedHashMap<string, Type>(this.services);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.SPIClassIterator<S> loader = org.apache.lucene.util.SPIClassIterator.get(clazz, classloader);
+			SPIClassIterator<S> loader = SPIClassIterator.get(clazz, classloader);
+			while (loader.hasNext())
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = loader.next();
+			  Type service = loader.next();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String clazzName = service.getSimpleName();
+			  string clazzName = service.SimpleName;
+			  string name = null;
+			  foreach (string suffix in suffixes)
+			  {
+				if (clazzName.EndsWith(suffix, StringComparison.Ordinal))
+				{
+				  name = clazzName.Substring(0, clazzName.Length - suffix.Length).ToLower(Locale.ROOT);
+				  break;
+				}
+			  }
+			  if (name == null)
+			  {
+				throw new ServiceConfigurationError("The class name " + service.Name + " has wrong suffix, allowed are: " + Arrays.ToString(suffixes));
+			  }
+			  // only add the first one for each name, later services will be ignored
+			  // this allows to place services before others in classpath to make 
+			  // them used instead of others
+			  //
+			  // TODO: Should we disallow duplicate names here?
+			  // Allowing it may get confusing on collisions, as different packages
+			  // could contain same factory class, which is a naming bug!
+			  // When changing this be careful to allow reload()!
+			  if (!services.containsKey(name))
+			  {
+				services.put(name, service);
+			  }
+			}
+			this.services = Collections.unmodifiableMap(services);
+		  }
+	  }
+
+	  public S newInstance(string name, IDictionary<string, string> args)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = lookupClass(name);
+		Type service = lookupClass(name);
+		try
+		{
+		  return service.getConstructor(typeof(IDictionary)).newInstance(args);
+		}
+		catch (Exception e)
+		{
+		  throw new System.ArgumentException("SPI class of type " + clazz.Name + " with name '" + name + "' cannot be instantiated. " + "This is likely due to a misconfiguration of the java class '" + service.Name + "': ", e);
+		}
+	  }
+
+	  public Type lookupClass(string name)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = services.get(name.toLowerCase(java.util.Locale.ROOT));
+		Type service = services[name.ToLower(Locale.ROOT)];
+		if (service != null)
+		{
+		  return service;
+		}
+		else
+		{
+		  throw new System.ArgumentException("A SPI class of type " + clazz.Name + " with name '" + name + "' does not exist. " + "You need to add the corresponding JAR file supporting this SPI to your classpath. " + "The current classpath supports the following names: " + availableServices());
+		}
+	  }
+
+	  public HashSet<string> availableServices()
+	  {
+		return services.Keys;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
new file mode 100644
index 0000000..1d1c44b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
@@ -0,0 +1,278 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// A CharacterIterator used internally for use with <seealso cref="BreakIterator"/>
+	/// @lucene.internal
+	/// </summary>
+	public abstract class CharArrayIterator : CharacterIterator
+	{
+	  private char[] array;
+	  private int start;
+	  private int index;
+	  private int length;
+	  private int limit;
+
+	  public virtual char [] Text
+	  {
+		  get
+		  {
+			return array;
+		  }
+	  }
+
+	  public virtual int Start
+	  {
+		  get
+		  {
+			return start;
+		  }
+	  }
+
+	  public virtual int Length
+	  {
+		  get
+		  {
+			return length;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Set a new region of text to be examined by this iterator
+	  /// </summary>
+	  /// <param name="array"> text buffer to examine </param>
+	  /// <param name="start"> offset into buffer </param>
+	  /// <param name="length"> maximum length to examine </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public void setText(final char array[] , int start, int length)
+	  public virtual void setText(char[] array, int start, int length)
+	  {
+		this.array = array;
+		this.start = start;
+		this.index = start;
+		this.length = length;
+		this.limit = start + length;
+	  }
+
+	  public override char current()
+	  {
+		return (index == limit) ? DONE : jreBugWorkaround(array[index]);
+	  }
+
+	  protected internal abstract char jreBugWorkaround(char ch);
+
+	  public override char first()
+	  {
+		index = start;
+		return current();
+	  }
+
+	  public override int BeginIndex
+	  {
+		  get
+		  {
+			return 0;
+		  }
+	  }
+
+	  public override int EndIndex
+	  {
+		  get
+		  {
+			return length;
+		  }
+	  }
+
+	  public override int Index
+	  {
+		  get
+		  {
+			return index - start;
+		  }
+	  }
+
+	  public override char last()
+	  {
+		index = (limit == start) ? limit : limit - 1;
+		return current();
+	  }
+
+	  public override char next()
+	  {
+		if (++index >= limit)
+		{
+		  index = limit;
+		  return DONE;
+		}
+		else
+		{
+		  return current();
+		}
+	  }
+
+	  public override char previous()
+	  {
+		if (--index < start)
+		{
+		  index = start;
+		  return DONE;
+		}
+		else
+		{
+		  return current();
+		}
+	  }
+
+	  public override char setIndex(int position)
+	  {
+		if (position < BeginIndex || position > EndIndex)
+		{
+		  throw new System.ArgumentException("Illegal Position: " + position);
+		}
+		index = start + position;
+		return current();
+	  }
+
+	  public override CharArrayIterator clone()
+	  {
+		try
+		{
+		  return (CharArrayIterator)base.clone();
+		}
+		catch (CloneNotSupportedException e)
+		{
+		  // CharacterIterator does not allow you to throw CloneNotSupported
+		  throw new Exception(e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Create a new CharArrayIterator that works around JRE bugs
+	  /// in a manner suitable for <seealso cref="BreakIterator#getSentenceInstance()"/>
+	  /// </summary>
+	  public static CharArrayIterator newSentenceInstance()
+	  {
+		if (HAS_BUGGY_BREAKITERATORS)
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper();
+		}
+		else
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper2();
+		}
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper()
+		  {
+		  }
+
+			  // work around this for now by lying about all surrogates to 
+			  // the sentence tokenizer, instead we treat them all as 
+			  // SContinue so we won't break around them.
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch >= 0xD800 && ch <= 0xDFFF ? 0x002C : ch;
+		  }
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper2 : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper2()
+		  {
+		  }
+
+			  // no bugs
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Create a new CharArrayIterator that works around JRE bugs
+	  /// in a manner suitable for <seealso cref="BreakIterator#getWordInstance()"/>
+	  /// </summary>
+	  public static CharArrayIterator newWordInstance()
+	  {
+		if (HAS_BUGGY_BREAKITERATORS)
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper3();
+		}
+		else
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper4();
+		}
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper3 : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper3()
+		  {
+		  }
+
+			  // work around this for now by lying about all surrogates to the word, 
+			  // instead we treat them all as ALetter so we won't break around them.
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch >= 0xD800 && ch <= 0xDFFF ? 0x0041 : ch;
+		  }
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper4 : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper4()
+		  {
+		  }
+
+			  // no bugs
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch;
+		  }
+	  }
+
+	  /// <summary>
+	  /// True if this JRE has a buggy BreakIterator implementation
+	  /// </summary>
+	  public static readonly bool HAS_BUGGY_BREAKITERATORS;
+	  static CharArrayIterator()
+	  {
+		bool v;
+		try
+		{
+		  BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
+		  bi.Text = "\udb40\udc53";
+		  bi.next();
+		  v = false;
+		}
+		catch (Exception)
+		{
+		  v = true;
+		}
+		HAS_BUGGY_BREAKITERATORS = v;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
new file mode 100644
index 0000000..1086572
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -0,0 +1,928 @@
+using System;
+using System.Diagnostics;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// A simple class that stores key Strings as char[]'s in a
+	/// hash table. Note that this is not a general purpose
+	/// class.  For example, it cannot remove items from the
+	/// map, nor does it resize its hash table to be smaller,
+	/// etc.  It is designed to be quick to retrieve items
+	/// by char[] keys without the necessity of converting
+	/// to a String first.
+	/// 
+	/// <a name="version"></a>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating <seealso cref="CharArrayMap"/>:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are
+	///       properly lowercased.</li>
+	/// </ul>
+	/// Before 3.1 supplementary characters could not be
+	/// lowercased correctly due to the lack of Unicode 4
+	/// support in JDK 1.4. To use instances of
+	/// <seealso cref="CharArrayMap"/> with the behavior before Lucene
+	/// 3.1 pass a <seealso cref="Version"/> &lt; 3.1 to the constructors.
+	/// </para>
+	/// </summary>
+	public class CharArrayMap<V> : AbstractMap<object, V>
+	{
+	  // private only because missing generics
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
+	  private static readonly CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<?>();
+
+	  private const int INIT_SIZE = 8;
+	  private readonly CharacterUtils charUtils;
+	  private bool ignoreCase;
+	  private int count;
+	  internal readonly Version matchVersion; // package private because used in CharArraySet
+	  internal char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+	  internal V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+
+	  /// <summary>
+	  /// Create map with enough capacity to hold startSize terms
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="startSize">
+	  ///          the initial capacity </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") public CharArrayMap(org.apache.lucene.util.Version matchVersion, int startSize, boolean ignoreCase)
+	  public CharArrayMap(Version matchVersion, int startSize, bool ignoreCase)
+	  {
+		this.ignoreCase = ignoreCase;
+		int size_Renamed = INIT_SIZE;
+		while (startSize + (startSize >> 2) > size_Renamed)
+		{
+		  size_Renamed <<= 1;
+		}
+		keys = new char[size_Renamed][];
+		values = (V[]) new object[size_Renamed];
+		this.charUtils = CharacterUtils.getInstance(matchVersion);
+		this.matchVersion = matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Creates a map from the mappings in another map. 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="c">
+	  ///          a map whose mappings to be copied </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+	  public CharArrayMap<T1>(Version matchVersion, IDictionary<T1> c, bool ignoreCase) where T1 : V : this(matchVersion, c.Count, ignoreCase)
+	  {
+		putAll(c);
+	  }
+
+	  /// <summary>
+	  /// Create set from the supplied map (used internally for readonly maps...) </summary>
+	  private CharArrayMap(CharArrayMap<V> toCopy)
+	  {
+		this.keys = toCopy.keys;
+		this.values = toCopy.values;
+		this.ignoreCase = toCopy.ignoreCase;
+		this.count = toCopy.count;
+		this.charUtils = toCopy.charUtils;
+		this.matchVersion = toCopy.matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Clears all entries in this map. This method is supported for reusing, but not <seealso cref="Map#remove"/>. </summary>
+	  public override void clear()
+	  {
+		count = 0;
+		Arrays.fill(keys, null);
+		Arrays.fill(values, null);
+	  }
+
+	  /// <summary>
+	  /// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+	  /// are in the <seealso cref="#keySet()"/> 
+	  /// </summary>
+	  public virtual bool containsKey(char[] text, int off, int len)
+	  {
+		return keys[getSlot(text, off, len)] != null;
+	  }
+
+	  /// <summary>
+	  /// true if the <code>CharSequence</code> is in the <seealso cref="#keySet()"/> </summary>
+	  public virtual bool containsKey(CharSequence cs)
+	  {
+		return keys[getSlot(cs)] != null;
+	  }
+
+	  public override bool containsKey(object o)
+	  {
+		if (o is char[])
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] text = (char[])o;
+		  char[] text = (char[])o;
+		  return containsKey(text, 0, text.Length);
+		}
+		return containsKey(o.ToString());
+	  }
+
+	  /// <summary>
+	  /// returns the value of the mapping of <code>len</code> chars of <code>text</code>
+	  /// starting at <code>off</code> 
+	  /// </summary>
+	  public virtual V get(char[] text, int off, int len)
+	  {
+		return values[getSlot(text, off, len)];
+	  }
+
+	  /// <summary>
+	  /// returns the value of the mapping of the chars inside this {@code CharSequence} </summary>
+	  public virtual V get(CharSequence cs)
+	  {
+		return values[getSlot(cs)];
+	  }
+
+	  public override V get(object o)
+	  {
+		if (o is char[])
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] text = (char[])o;
+		  char[] text = (char[])o;
+		  return get(text, 0, text.Length);
+		}
+		return get(o.ToString());
+	  }
+
+	  private int getSlot(char[] text, int off, int len)
+	  {
+		int code = getHashCode(text, off, len);
+		int pos = code & (keys.Length - 1);
+		char[] text2 = keys[pos];
+		if (text2 != null && !Equals(text, off, len, text2))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
+		  int inc = ((code >> 8) + code) | 1;
+		  do
+		  {
+			code += inc;
+			pos = code & (keys.Length - 1);
+			text2 = keys[pos];
+		  } while (text2 != null && !Equals(text, off, len, text2));
+		}
+		return pos;
+	  }
+
+	  /// <summary>
+	  /// Returns true if the String is in the set </summary>
+	  private int getSlot(CharSequence text)
+	  {
+		int code = getHashCode(text);
+		int pos = code & (keys.Length - 1);
+		char[] text2 = keys[pos];
+		if (text2 != null && !Equals(text, text2))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
+		  int inc = ((code >> 8) + code) | 1;
+		  do
+		  {
+			code += inc;
+			pos = code & (keys.Length - 1);
+			text2 = keys[pos];
+		  } while (text2 != null && !Equals(text, text2));
+		}
+		return pos;
+	  }
+
+	  /// <summary>
+	  /// Add the given mapping. </summary>
+	  public virtual V put(CharSequence text, V value)
+	  {
+		return put(text.ToString(), value); // could be more efficient
+	  }
+
+	  public override V put(object o, V value)
+	  {
+		if (o is char[])
+		{
+		  return put((char[])o, value);
+		}
+		return put(o.ToString(), value);
+	  }
+
+	  /// <summary>
+	  /// Add the given mapping. </summary>
+	  public virtual V put(string text, V value)
+	  {
+		return put(text.ToCharArray(), value);
+	  }
+
+	  /// <summary>
+	  /// Add the given mapping.
+	  /// If ignoreCase is true for this Set, the text array will be directly modified.
+	  /// The user should never modify this text array after calling this method.
+	  /// </summary>
+	  public virtual V put(char[] text, V value)
+	  {
+		if (ignoreCase)
+		{
+		  charUtils.ToLower(text, 0, text.Length);
+		}
+		int slot = getSlot(text, 0, text.Length);
+		if (keys[slot] != null)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V oldValue = values[slot];
+		  V oldValue = values[slot];
+		  values[slot] = value;
+		  return oldValue;
+		}
+		keys[slot] = text;
+		values[slot] = value;
+		count++;
+
+		if (count + (count >> 2) > keys.Length)
+		{
+		  rehash();
+		}
+
+		return null;
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") private void rehash()
+	  private void rehash()
+	  {
+		Debug.Assert(keys.Length == values.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newSize = 2*keys.length;
+		int newSize = 2 * keys.Length;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[][] oldkeys = keys;
+		char[][] oldkeys = keys;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V[] oldvalues = values;
+		V[] oldvalues = values;
+		keys = new char[newSize][];
+		values = (V[]) new object[newSize];
+
+		for (int i = 0; i < oldkeys.Length; i++)
+		{
+		  char[] text = oldkeys[i];
+		  if (text != null)
+		  {
+			// todo: could be faster... no need to compare strings on collision
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int slot = getSlot(text,0,text.length);
+			int slot = getSlot(text,0,text.Length);
+			keys[slot] = text;
+			values[slot] = oldvalues[i];
+		  }
+		}
+	  }
+
+	  private bool Equals(char[] text1, int off, int len, char[] text2)
+	  {
+		if (len != text2.Length)
+		{
+		  return false;
+		}
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = off+len;
+		int limit = off + len;
+		if (ignoreCase)
+		{
+		  for (int i = 0;i < len;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
+			int codePointAt = charUtils.codePointAt(text1, off + i, limit);
+			if (char.ToLower(codePointAt) != charUtils.codePointAt(text2, i, text2.Length))
+			{
+			  return false;
+			}
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = 0;i < len;i++)
+		  {
+			if (text1[off + i] != text2[i])
+			{
+			  return false;
+			}
+		  }
+		}
+		return true;
+	  }
+
+	  private bool Equals(CharSequence text1, char[] text2)
+	  {
+		int len = text1.length();
+		if (len != text2.Length)
+		{
+		  return false;
+		}
+		if (ignoreCase)
+		{
+		  for (int i = 0;i < len;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text1, i);
+			int codePointAt = charUtils.codePointAt(text1, i);
+			if (char.ToLower(codePointAt) != charUtils.codePointAt(text2, i, text2.Length))
+			{
+			  return false;
+			}
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = 0;i < len;i++)
+		  {
+			if (text1.charAt(i) != text2[i])
+			{
+			  return false;
+			}
+		  }
+		}
+		return true;
+	  }
+
+	  private int getHashCode(char[] text, int offset, int len)
+	  {
+		if (text == null)
+		{
+		  throw new System.NullReferenceException();
+		}
+		int code = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int stop = offset + len;
+		int stop = offset + len;
+		if (ignoreCase)
+		{
+		  for (int i = offset; i < stop;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text, i, stop);
+			int codePointAt = charUtils.codePointAt(text, i, stop);
+			code = code * 31 + char.ToLower(codePointAt);
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = offset; i < stop; i++)
+		  {
+			code = code * 31 + text[i];
+		  }
+		}
+		return code;
+	  }
+
+	  private int getHashCode(CharSequence text)
+	  {
+		if (text == null)
+		{
+		  throw new System.NullReferenceException();
+		}
+		int code = 0;
+		int len = text.length();
+		if (ignoreCase)
+		{
+		  for (int i = 0; i < len;)
+		  {
+			int codePointAt = charUtils.codePointAt(text, i);
+			code = code * 31 + char.ToLower(codePointAt);
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = 0; i < len; i++)
+		  {
+			code = code * 31 + text.charAt(i);
+		  }
+		}
+		return code;
+	  }
+
+	  public override V remove(object key)
+	  {
+		throw new System.NotSupportedException();
+	  }
+
+	  public override int size()
+	  {
+		return count;
+	  }
+
+	  public override string ToString()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("{");
+		StringBuilder sb = new StringBuilder("{");
+		foreach (KeyValuePair<object, V> entry in entrySet())
+		{
+		  if (sb.Length > 1)
+		  {
+			  sb.Append(", ");
+		  }
+		  sb.Append(entry);
+		}
+		return sb.Append('}').ToString();
+	  }
+
+	  private EntrySet entrySet_Renamed = null;
+	  private CharArraySet keySet_Renamed = null;
+
+	  internal virtual EntrySet createEntrySet()
+	  {
+		return new EntrySet(this, true);
+	  }
+
+	  public override EntrySet entrySet()
+	  {
+		if (entrySet_Renamed == null)
+		{
+		  entrySet_Renamed = createEntrySet();
+		}
+		return entrySet_Renamed;
+	  }
+
+	  // helper for CharArraySet to not produce endless recursion
+	  internal HashSet<object> originalKeySet()
+	  {
+		return base.Keys;
+	  }
+
+	  /// <summary>
+	  /// Returns an <seealso cref="CharArraySet"/> view on the map's keys.
+	  /// The set will use the same {@code matchVersion} as this map. 
+	  /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings({"unchecked","rawtypes"}) public final CharArraySet keySet()
+	  public override CharArraySet keySet()
+	  {
+		if (keySet_Renamed == null)
+		{
+		  // prevent adding of entries
+		  keySet_Renamed = new CharArraySetAnonymousInnerClassHelper(this, (CharArrayMap) this);
+		}
+		return keySet_Renamed;
+	  }
+
+	  private class CharArraySetAnonymousInnerClassHelper : CharArraySet
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		  public CharArraySetAnonymousInnerClassHelper(CharArrayMap<V> outerInstance, CharArrayMap (CharArrayMap) this) : base((CharArrayMap) this)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override bool add(object o)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  public override bool add(CharSequence text)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  public override bool add(string text)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  public override bool add(char[] text)
+		  {
+			throw new System.NotSupportedException();
+		  }
+	  }
+
+	  /// <summary>
+	  /// public iterator class so efficient methods are exposed to users </summary>
+	  public class EntryIterator : IEnumerator<KeyValuePair<object, V>>
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		internal int pos = -1;
+		internal int lastPos;
+		internal readonly bool allowModify;
+
+		internal EntryIterator(CharArrayMap<V> outerInstance, bool allowModify)
+		{
+			this.outerInstance = outerInstance;
+		  this.allowModify = allowModify;
+		  goNext();
+		}
+
+		internal virtual void goNext()
+		{
+		  lastPos = pos;
+		  pos++;
+		  while (pos < outerInstance.keys.Length && outerInstance.keys[pos] == null)
+		  {
+			  pos++;
+		  }
+		}
+
+		public override bool hasNext()
+		{
+		  return pos < outerInstance.keys.Length;
+		}
+
+		/// <summary>
+		/// gets the next key... do not modify the returned char[] </summary>
+		public virtual char[] nextKey()
+		{
+		  goNext();
+		  return outerInstance.keys[lastPos];
+		}
+
+		/// <summary>
+		/// gets the next key as a newly created String object </summary>
+		public virtual string nextKeyString()
+		{
+		  return new string(nextKey());
+		}
+
+		/// <summary>
+		/// returns the value associated with the last key returned </summary>
+		public virtual V currentValue()
+		{
+		  return outerInstance.values[lastPos];
+		}
+
+		/// <summary>
+		/// sets the value associated with the last key returned </summary>
+		public virtual V setValue(V value)
+		{
+		  if (!allowModify)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  V old = outerInstance.values[lastPos];
+		  outerInstance.values[lastPos] = value;
+		  return old;
+		}
+
+		/// <summary>
+		/// use nextCharArray() + currentValue() for better efficiency. </summary>
+		public override KeyValuePair<object, V> next()
+		{
+		  goNext();
+		  return new MapEntry(outerInstance, lastPos, allowModify);
+		}
+
+		public override void remove()
+		{
+		  throw new System.NotSupportedException();
+		}
+	  }
+
+	  private sealed class MapEntry : KeyValuePair<object, V>
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		internal readonly int pos;
+		internal readonly bool allowModify;
+
+		internal MapEntry(CharArrayMap<V> outerInstance, int pos, bool allowModify)
+		{
+			this.outerInstance = outerInstance;
+		  this.pos = pos;
+		  this.allowModify = allowModify;
+		}
+
+		public override object Key
+		{
+			get
+			{
+			  // we must clone here, as putAll to another CharArrayMap
+			  // with other case sensitivity flag would corrupt the keys
+			  return outerInstance.keys[pos].clone();
+			}
+		}
+
+		public override V Value
+		{
+			get
+			{
+			  return outerInstance.values[pos];
+			}
+		}
+
+		public override V setValue(V value)
+		{
+		  if (!allowModify)
+		  {
+			throw new System.NotSupportedException();
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V old = values[pos];
+		  V old = outerInstance.values[pos];
+		  outerInstance.values[pos] = value;
+		  return old;
+		}
+
+		public override string ToString()
+		{
+		  return (new StringBuilder()).Append(outerInstance.keys[pos]).Append('=').Append((outerInstance.values[pos] == outerInstance) ? "(this Map)" : outerInstance.values[pos]).ToString();
+		}
+	  }
+
+	  /// <summary>
+	  /// public EntrySet class so efficient methods are exposed to users </summary>
+	  public sealed class EntrySet : AbstractSet<KeyValuePair<object, V>>
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		internal readonly bool allowModify;
+
+		internal EntrySet(CharArrayMap<V> outerInstance, bool allowModify)
+		{
+			this.outerInstance = outerInstance;
+		  this.allowModify = allowModify;
+		}
+
+		public override EntryIterator iterator()
+		{
+		  return new EntryIterator(outerInstance, allowModify);
+		}
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public boolean contains(Object o)
+		public override bool contains(object o)
+		{
+		  if (!(o is DictionaryEntry))
+		  {
+			return false;
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Map.Entry<Object,V> e = (java.util.Map.Entry<Object,V>)o;
+		  KeyValuePair<object, V> e = (KeyValuePair<object, V>)o;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object key = e.getKey();
+		  object key = e.Key;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object val = e.getValue();
+		  object val = e.Value;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object v = get(key);
+		  object v = outerInstance.get(key);
+		  return v == null ? val == null : v.Equals(val);
+		}
+
+		public override bool remove(object o)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public override int size()
+		{
+		  return outerInstance.count;
+		}
+
+		public override void clear()
+		{
+		  if (!allowModify)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  outerInstance.clear();
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns an unmodifiable <seealso cref="CharArrayMap"/>. This allows to provide
+	  /// unmodifiable views of internal map for "read-only" use.
+	  /// </summary>
+	  /// <param name="map">
+	  ///          a map for which the unmodifiable map is returned. </param>
+	  /// <returns> an new unmodifiable <seealso cref="CharArrayMap"/>. </returns>
+	  /// <exception cref="NullPointerException">
+	  ///           if the given map is <code>null</code>. </exception>
+	  public static CharArrayMap<V> unmodifiableMap<V>(CharArrayMap<V> map)
+	  {
+		if (map == null)
+		{
+		  throw new System.NullReferenceException("Given map is null");
+		}
+		if (map == emptyMap() || map.Empty)
+		{
+		  return emptyMap();
+		}
+		if (map is UnmodifiableCharArrayMap)
+		{
+		  return map;
+		}
+		return new UnmodifiableCharArrayMap<>(map);
+	  }
+
+	  /// <summary>
+	  /// Returns a copy of the given map as a <seealso cref="CharArrayMap"/>. If the given map
+	  /// is a <seealso cref="CharArrayMap"/> the ignoreCase property will be preserved.
+	  /// <para>
+	  /// <b>Note:</b> If you intend to create a copy of another <seealso cref="CharArrayMap"/> where
+	  /// the <seealso cref="Version"/> of the source map differs from its copy
+	  /// <seealso cref="#CharArrayMap(Version, Map, boolean)"/> should be used instead.
+	  /// The <seealso cref="#copy(Version, Map)"/> will preserve the <seealso cref="Version"/> of the
+	  /// source map it is an instance of <seealso cref="CharArrayMap"/>.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. This argument will be ignored if the
+	  ///          given map is a <seealso cref="CharArrayMap"/>. </param>
+	  /// <param name="map">
+	  ///          a map to copy </param>
+	  /// <returns> a copy of the given map as a <seealso cref="CharArrayMap"/>. If the given map
+	  ///         is a <seealso cref="CharArrayMap"/> the ignoreCase property as well as the
+	  ///         matchVersion will be of the given map will be preserved. </returns>
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+	  SuppressWarnings("unchecked") public static <V> CharArrayMap<V> copy(final org.apache.lucene.util.Version matchVersion, final java.util.Map<?,? extends V> map)
+	  {
+		if (map == EMPTY_MAP)
+		{
+		  return emptyMap();
+		}
+		if (map is CharArrayMap)
+		{
+		  CharArrayMap<V> m = (CharArrayMap<V>) map;
+		  // use fast path instead of iterating all values
+		  // this is even on very small sets ~10 times faster than iterating
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[][] keys = new char[m.keys.length][];
+		  char[][] keys = new char[m.keys.Length][];
+		  Array.Copy(m.keys, 0, keys, 0, keys.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V[] values = (V[]) new Object[m.values.length];
+		  V[] values = (V[]) new object[m.values.Length];
+		  Array.Copy(m.values, 0, values, 0, values.Length);
+		  m = new CharArrayMap<>(m);
+		  m.keys = keys;
+		  m.values = values;
+		  return m;
+		}
+		return new CharArrayMap<>(matchVersion, map, false);
+	  }
+
+	  /// <summary>
+	  /// Returns an empty, unmodifiable map. </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") public static <V> CharArrayMap<V> emptyMap()
+	  public static <V> CharArrayMap<V> emptyMap()
+	  {
+		return (CharArrayMap<V>) EMPTY_MAP;
+	  }
+
+	  // package private CharArraySet instanceof check in CharArraySet
+	  static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V>
+	  {
+
+		UnmodifiableCharArrayMap(CharArrayMap<V> map)
+		{
+		  base(map);
+		}
+
+		public void clear()
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(object o, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(char[] text, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(CharSequence text, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(string text, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V remove(object key)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		EntrySet createEntrySet()
+		{
+		  return new EntrySet(this, false);
+		}
+	  }
+
+	  /// <summary>
+	  /// Empty <seealso cref="org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap"/> optimized for speed.
+	  /// Contains checks will always return <code>false</code> or throw
+	  /// NPE if necessary.
+	  /// </summary>
+	  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V>
+	  {
+		EmptyCharArrayMap()
+		{
+		  base(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
+		}
+
+		public bool containsKey(char[] text, int off, int len)
+		{
+		  if (text == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return false;
+		}
+
+		public bool containsKey(CharSequence cs)
+		{
+		  if (cs == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return false;
+		}
+
+		public bool containsKey(object o)
+		{
+		  if (o == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return false;
+		}
+
+		public V get(char[] text, int off, int len)
+		{
+		  if (text == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return null;
+		}
+
+		public V get(CharSequence cs)
+		{
+		  if (cs == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return null;
+		}
+
+		public V get(object o)
+		{
+		  if (o == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return null;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
new file mode 100644
index 0000000..d9253d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -0,0 +1,267 @@
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// A simple class that stores Strings as char[]'s in a
+	/// hash table.  Note that this is not a general purpose
+	/// class.  For example, it cannot remove items from the
+	/// set, nor does it resize its hash table to be smaller,
+	/// etc.  It is designed to be quick to test if a char[]
+	/// is in the set without the necessity of converting it
+	/// to a String first.
+	/// 
+	/// <a name="version"></a>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating <seealso cref="CharArraySet"/>:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are
+	///       properly lowercased.</li>
+	/// </ul>
+	/// Before 3.1 supplementary characters could not be
+	/// lowercased correctly due to the lack of Unicode 4
+	/// support in JDK 1.4. To use instances of
+	/// <seealso cref="CharArraySet"/> with the behavior before Lucene
+	/// 3.1 pass a <seealso cref="Version"/> < 3.1 to the constructors.
+	/// <P>
+	/// <em>Please note:</em> This class implements <seealso cref="java.util.Set Set"/> but
+	/// does not behave like it should in all cases. The generic type is
+	/// {@code Set<Object>}, because you can add any object to it,
+	/// that has a string representation. The add methods will use
+	/// <seealso cref="Object#toString"/> and store the result using a {@code char[]}
+	/// buffer. The same behavior have the {@code contains()} methods.
+	/// The <seealso cref="#iterator()"/> returns an {@code Iterator<char[]>}.
+	/// </para>
+	/// </summary>
+	public class CharArraySet : AbstractSet<object>
+	{
+	  public static readonly CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap<object>());
+	  private static readonly object PLACEHOLDER = new object();
+
+	  private readonly CharArrayMap<object> map;
+
+	  /// <summary>
+	  /// Create set with enough capacity to hold startSize terms
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="startSize">
+	  ///          the initial capacity </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+	  public CharArraySet(Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a set from a Collection of objects. 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="c">
+	  ///          a collection whose elements to be placed into the set </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+	  public CharArraySet<T1>(Version matchVersion, ICollection<T1> c, bool ignoreCase) : this(matchVersion, c.Count, ignoreCase)
+	  {
+		addAll(c);
+	  }
+
+	  /// <summary>
+	  /// Create set from the specified map (internal only), used also by <seealso cref="CharArrayMap#keySet()"/> </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: CharArraySet(final CharArrayMap<Object> map)
+	  internal CharArraySet(CharArrayMap<object> map)
+	  {
+		this.map = map;
+	  }
+
+	  /// <summary>
+	  /// Clears all entries in this set. This method is supported for reusing, but not <seealso cref="Set#remove"/>. </summary>
+	  public override void clear()
+	  {
+		map.clear();
+	  }
+
+	  /// <summary>
+	  /// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+	  /// are in the set 
+	  /// </summary>
+	  public virtual bool contains(char[] text, int off, int len)
+	  {
+		return map.containsKey(text, off, len);
+	  }
+
+	  /// <summary>
+	  /// true if the <code>CharSequence</code> is in the set </summary>
+	  public virtual bool contains(CharSequence cs)
+	  {
+		return map.containsKey(cs);
+	  }
+
+	  public override bool contains(object o)
+	  {
+		return map.containsKey(o);
+	  }
+
+	  public override bool add(object o)
+	  {
+		return map.put(o, PLACEHOLDER) == null;
+	  }
+
+	  /// <summary>
+	  /// Add this CharSequence into the set </summary>
+	  public virtual bool add(CharSequence text)
+	  {
+		return map.put(text, PLACEHOLDER) == null;
+	  }
+
+	  /// <summary>
+	  /// Add this String into the set </summary>
+	  public virtual bool add(string text)
+	  {
+		return map.put(text, PLACEHOLDER) == null;
+	  }
+
+	  /// <summary>
+	  /// Add this char[] directly to the set.
+	  /// If ignoreCase is true for this Set, the text array will be directly modified.
+	  /// The user should never modify this text array after calling this method.
+	  /// </summary>
+	  public virtual bool add(char[] text)
+	  {
+		return map.put(text, PLACEHOLDER) == null;
+	  }
+
+	  public override int size()
+	  {
+		return map.size();
+	  }
+
+	  /// <summary>
+	  /// Returns an unmodifiable <seealso cref="CharArraySet"/>. This allows to provide
+	  /// unmodifiable views of internal sets for "read-only" use.
+	  /// </summary>
+	  /// <param name="set">
+	  ///          a set for which the unmodifiable set is returned. </param>
+	  /// <returns> an new unmodifiable <seealso cref="CharArraySet"/>. </returns>
+	  /// <exception cref="NullPointerException">
+	  ///           if the given set is <code>null</code>. </exception>
+	  public static CharArraySet unmodifiableSet(CharArraySet set)
+	  {
+		if (set == null)
+		{
+		  throw new System.NullReferenceException("Given set is null");
+		}
+		if (set == EMPTY_SET)
+		{
+		  return EMPTY_SET;
+		}
+		if (set.map is CharArrayMap.UnmodifiableCharArrayMap)
+		{
+		  return set;
+		}
+		return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
+	  }
+
+	  /// <summary>
+	  /// Returns a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
+	  /// is a <seealso cref="CharArraySet"/> the ignoreCase property will be preserved.
+	  /// <para>
+	  /// <b>Note:</b> If you intend to create a copy of another <seealso cref="CharArraySet"/> where
+	  /// the <seealso cref="Version"/> of the source set differs from its copy
+	  /// <seealso cref="#CharArraySet(Version, Collection, boolean)"/> should be used instead.
+	  /// The <seealso cref="#copy(Version, Set)"/> will preserve the <seealso cref="Version"/> of the
+	  /// source set it is an instance of <seealso cref="CharArraySet"/>.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. This argument will be ignored if the
+	  ///          given set is a <seealso cref="CharArraySet"/>. </param>
+	  /// <param name="set">
+	  ///          a set to copy </param>
+	  /// <returns> a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
+	  ///         is a <seealso cref="CharArraySet"/> the ignoreCase property as well as the
+	  ///         matchVersion will be of the given set will be preserved. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet copy(final org.apache.lucene.util.Version matchVersion, final java.util.Set<?> set)
+	  public static CharArraySet copy<T1>(Version matchVersion, HashSet<T1> set)
+	  {
+		if (set == EMPTY_SET)
+		{
+		  return EMPTY_SET;
+		}
+		if (set is CharArraySet)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final CharArraySet source = (CharArraySet) set;
+		  CharArraySet source = (CharArraySet) set;
+		  return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
+		}
+		return new CharArraySet(matchVersion, set, false);
+	  }
+
+	  /// <summary>
+	  /// Returns an <seealso cref="Iterator"/> for {@code char[]} instances in this set.
+	  /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public java.util.Iterator<Object> iterator()
+	  public override IEnumerator<object> iterator()
+	  {
+		// use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
+		return map.originalKeySet().GetEnumerator();
+	  }
+
+	  public override string ToString()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("[");
+		StringBuilder sb = new StringBuilder("[");
+		foreach (object item in this)
+		{
+		  if (sb.Length > 1)
+		  {
+			  sb.Append(", ");
+		  }
+		  if (item is char[])
+		  {
+			sb.Append((char[]) item);
+		  }
+		  else
+		  {
+			sb.Append(item);
+		  }
+		}
+		return sb.Append(']').ToString();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
new file mode 100644
index 0000000..e2f5b0a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
@@ -0,0 +1,86 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Abstract parent class for analysis factories that create <seealso cref="CharFilter"/>
+	/// instances.
+	/// </summary>
+	public abstract class CharFilterFactory : AbstractAnalysisFactory
+	{
+
+	  private static readonly AnalysisSPILoader<CharFilterFactory> loader = new AnalysisSPILoader<CharFilterFactory>(typeof(CharFilterFactory));
+
+	  /// <summary>
+	  /// looks up a charfilter by name from context classpath </summary>
+	  public static CharFilterFactory forName(string name, IDictionary<string, string> args)
+	  {
+		return loader.newInstance(name, args);
+	  }
+
+	  /// <summary>
+	  /// looks up a charfilter class by name from context classpath </summary>
+	  public static Type lookupClass(string name)
+	  {
+		return loader.lookupClass(name);
+	  }
+
+	  /// <summary>
+	  /// returns a list of all available charfilter names </summary>
+	  public static HashSet<string> availableCharFilters()
+	  {
+		return loader.availableServices();
+	  }
+
+	  /// <summary>
+	  /// Reloads the factory list from the given <seealso cref="ClassLoader"/>.
+	  /// Changes to the factories are visible after the method ends, all
+	  /// iterators (<seealso cref="#availableCharFilters()"/>,...) stay consistent. 
+	  /// 
+	  /// <para><b>NOTE:</b> Only new factories are added, existing ones are
+	  /// never removed or replaced.
+	  /// 
+	  /// </para>
+	  /// <para><em>This method is expensive and should only be called for discovery
+	  /// of new factories on the given classpath/classloader!</em>
+	  /// </para>
+	  /// </summary>
+	  public static void reloadCharFilters(ClassLoader classloader)
+	  {
+		loader.reload(classloader);
+	  }
+
+	  /// <summary>
+	  /// Initialize this factory via a set of key-value pairs.
+	  /// </summary>
+	  protected internal CharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Wraps the given Reader with a CharFilter. </summary>
+	  public abstract Reader create(Reader input);
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
new file mode 100644
index 0000000..1cd6395
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
@@ -0,0 +1,209 @@
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// An abstract base class for simple, character-oriented tokenizers. 
+    /// <para>
+    /// <a name="version">You must specify the required <seealso cref="Version"/> compatibility
+    /// when creating <seealso cref="CharTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+    /// detect token codepoints. See <seealso cref="#isTokenChar(int)"/> and
+    /// <seealso cref="#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// <para>
+    /// A new <seealso cref="CharTokenizer"/> API has been introduced with Lucene 3.1. This API
+    /// moved from UTF-16 code units to UTF-32 codepoints to eventually add support
+    /// for <a href=
+    /// "http://java.sun.com/j2se/1.5.0/docs/api/java/lang/Character.html#supplementary"
+    /// >supplementary characters</a>. The old <i>char</i> based API has been
+    /// deprecated and should be replaced with the <i>int</i> based methods
+    /// <seealso cref="#isTokenChar(int)"/> and <seealso cref="#normalize(int)"/>.
+    /// </para>
+    /// <para>
+    /// As of Lucene 3.1 each <seealso cref="CharTokenizer"/> - constructor expects a
+    /// <seealso cref="Version"/> argument. Based on the given <seealso cref="Version"/> either the new
+    /// API or a backwards compatibility layer is used at runtime. For
+    /// <seealso cref="Version"/> < 3.1 the backwards compatibility layer ensures correct
+    /// behavior even for indexes build with previous versions of Lucene. If a
+    /// <seealso cref="Version"/> >= 3.1 is used <seealso cref="CharTokenizer"/> requires the new API to
+    /// be implemented by the instantiated class. Yet, the old <i>char</i> based API
+    /// is not required anymore even if backwards compatibility must be preserved.
+    /// <seealso cref="CharTokenizer"/> subclasses implementing the new API are fully backwards
+    /// compatible if instantiated with <seealso cref="Version"/> < 3.1.
+    /// </para>
+    /// <para>
+    /// <strong>Note:</strong> If you use a subclass of <seealso cref="CharTokenizer"/> with <seealso cref="Version"/> >=
+    /// 3.1 on an index build with a version < 3.1, created tokens might not be
+    /// compatible with the terms in your index.
+    /// </para>
+    /// 
+    /// </summary>
+    public abstract class CharTokenizer : Tokenizer
+    {
+        private readonly TextReader _input;
+
+        /// <summary>
+        /// Creates a new <seealso cref="CharTokenizer"/> instance
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match </param>
+        /// <param name="input">
+        ///          the input to split up into tokens </param>
+        public CharTokenizer(Version matchVersion, TextReader input)
+            : base(input)
+        {
+            charUtils = CharacterUtils.getInstance(matchVersion);
+        }
+
+        /// <summary>
+        /// Creates a new <seealso cref="CharTokenizer"/> instance
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match </param>
+        /// <param name="factory">
+        ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+        /// <param name="input">
+        ///          the input to split up into tokens </param>
+        public CharTokenizer(Version matchVersion, AttributeFactory factory, TextReader input)
+            : base(factory, input)
+        {
+            _input = input;
+            charUtils = CharacterUtils.getInstance(matchVersion);
+        }
+
+        private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+        private const int MAX_WORD_LEN = 255;
+        private const int IO_BUFFER_SIZE = 4096;
+
+        private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+        private readonly CharacterUtils charUtils;
+        private readonly CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
+
+        /// <summary>
+        /// Returns true iff a codepoint should be included in a token. This tokenizer
+        /// generates as tokens adjacent sequences of codepoints which satisfy this
+        /// predicate. Codepoints for which this is false are used to define token
+        /// boundaries and are not included in tokens.
+        /// </summary>
+        protected internal abstract bool IsTokenChar(char c);
+
+        /// <summary>
+        /// Called on each token character to normalize it before it is added to the
+        /// token. The default implementation does nothing. Subclasses may use this to,
+        /// e.g., lowercase tokens.
+        /// </summary>
+        protected virtual int Normalize(int c)
+        {
+            return c;
+        }
+
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            int length = 0;
+            int start = -1; // this variable is always initialized
+            int end_Renamed = -1;
+            char[] buffer = termAtt.Buffer();
+            while (true)
+            {
+                if (bufferIndex >= dataLen)
+                {
+                    offset += dataLen;
+                    charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
+                    if (ioBuffer.Length == 0)
+                    {
+                        dataLen = 0; // so next offset += dataLen won't decrement offset
+                        if (length > 0)
+                        {
+                            break;
+                        }
+                        else
+                        {
+                            finalOffset = CorrectOffset(offset);
+                            return false;
+                        }
+                    }
+                    dataLen = ioBuffer.Length;
+                    bufferIndex = 0;
+                }
+                // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
+                int c = charUtils.codePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length);
+                int charCount = Character.CharCount(c);
+                bufferIndex += charCount;
+
+                if (isTokenChar(c)) // if it's a token char
+                {
+                    if (length == 0) // start of token
+                    {
+                        Debug.Assert(start == -1);
+                        start = offset + bufferIndex - charCount;
+                        end_Renamed = start;
+                    } // check if a supplementary could run out of bounds
+                    else if (length >= buffer.Length - 1)
+                    {
+                        buffer = termAtt.ResizeBuffer(2 + length); // make sure a supplementary fits in the buffer
+                    }
+                    end_Renamed += charCount;
+                    length += Character.ToChars(Normalize(c), buffer, length); // buffer it, normalized
+                    if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
+                    {
+                        break;
+                    }
+                } // at non-Letter w/ chars
+                else if (length > 0)
+                {
+                    break; // return 'em
+                }
+            }
+
+            termAtt.Length = length;
+            Debug.Assert(start != -1);
+            offsetAtt.SetOffset(CorrectOffset(start), finalOffset = CorrectOffset(end_Renamed));
+            return true;
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            bufferIndex = 0;
+            offset = 0;
+            dataLen = 0;
+            finalOffset = 0;
+            ioBuffer.reset(); // make sure to reset the IO buffer!!
+        }
+    }
+}
\ No newline at end of file


Mime
View raw message