lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [02/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:38:51 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
new file mode 100644
index 0000000..783c4ce
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
@@ -0,0 +1,593 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+	using Version = org.apache.lucene.util.Version;
+
+
+	public class TestCharArraySet : LuceneTestCase
+	{
+
+	  internal static readonly string[] TEST_STOP_WORDS = new string[] {"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRehash() throws Exception
+	  public virtual void testRehash()
+	  {
+		CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
+		for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
+		{
+		  cas.add(TEST_STOP_WORDS[i]);
+		}
+		assertEquals(TEST_STOP_WORDS.Length, cas.size());
+		for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
+		{
+		  assertTrue(cas.contains(TEST_STOP_WORDS[i]));
+		}
+	  }
+
+	  public virtual void testNonZeroOffset()
+	  {
+		string[] words = new string[] {"Hello","World","this","is","a","test"};
+		char[] findme = "xthisy".ToCharArray();
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
+		set.addAll(words);
+		assertTrue(set.contains(findme, 1, 4));
+		assertTrue(set.contains(new string(findme,1,4)));
+
+		// test unmodifiable
+		set = CharArraySet.unmodifiableSet(set);
+		assertTrue(set.contains(findme, 1, 4));
+		assertTrue(set.contains(new string(findme,1,4)));
+	  }
+
+	  public virtual void testObjectContains()
+	  {
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
+		int? val = Convert.ToInt32(1);
+		set.add(val);
+		assertTrue(set.contains(val));
+		assertTrue(set.contains(new int?(1))); // another integer
+		assertTrue(set.contains("1"));
+		assertTrue(set.contains(new char[]{'1'}));
+		// test unmodifiable
+		set = CharArraySet.unmodifiableSet(set);
+		assertTrue(set.contains(val));
+		assertTrue(set.contains(new int?(1))); // another integer
+		assertTrue(set.contains("1"));
+		assertTrue(set.contains(new char[]{'1'}));
+	  }
+
+	  public virtual void testClear()
+	  {
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
+		set.addAll(TEST_STOP_WORDS);
+		assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
+		set.clear();
+		assertEquals("not empty", 0, set.size());
+		for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
+		{
+		  assertFalse(set.contains(TEST_STOP_WORDS[i]));
+		}
+		set.addAll(TEST_STOP_WORDS);
+		assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
+		for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
+		{
+		  assertTrue(set.contains(TEST_STOP_WORDS[i]));
+		}
+	  }
+
+	  public virtual void testModifyOnUnmodifiable()
+	  {
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
+		set.addAll(TEST_STOP_WORDS);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int size = set.size();
+		int size = set.size();
+		set = CharArraySet.unmodifiableSet(set);
+		assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
+		string NOT_IN_SET = "SirGallahad";
+		assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
+
+		try
+		{
+		  set.add(NOT_IN_SET.ToCharArray());
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+
+		try
+		{
+		  set.add(NOT_IN_SET);
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+
+		try
+		{
+		  set.add(new StringBuilder(NOT_IN_SET));
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+
+		try
+		{
+		  set.clear();
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+		try
+		{
+		  set.add((object) NOT_IN_SET);
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+
+		// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
+		// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
+		// remove() on the iterator
+		try
+		{
+		  set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+
+		try
+		{
+		  set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true));
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertEquals("Size of unmodifiable set has changed", size, set.size());
+		}
+
+		try
+		{
+		  set.addAll(NOT_IN_SET);
+		  fail("Modified unmodifiable set");
+		}
+		catch (System.NotSupportedException)
+		{
+		  // expected
+		  assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+		}
+
+		for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
+		{
+		  assertTrue(set.contains(TEST_STOP_WORDS[i]));
+		}
+	  }
+
+	  public virtual void testUnmodifiableSet()
+	  {
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
+		set.addAll(TEST_STOP_WORDS);
+		set.add(Convert.ToInt32(1));
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int size = set.size();
+		int size = set.size();
+		set = CharArraySet.unmodifiableSet(set);
+		assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
+		foreach (string stopword in TEST_STOP_WORDS)
+		{
+		  assertTrue(set.contains(stopword));
+		}
+		assertTrue(set.contains(Convert.ToInt32(1)));
+		assertTrue(set.contains("1"));
+		assertTrue(set.contains(new char[]{'1'}));
+
+		try
+		{
+		  CharArraySet.unmodifiableSet(null);
+		  fail("can not make null unmodifiable");
+		}
+		catch (System.NullReferenceException)
+		{
+		  // expected
+		}
+	  }
+
+	  public virtual void testSupplementaryChars()
+	  {
+		string missing = "Term %s is missing in the set";
+		string falsePos = "Term %s is in the set but shouldn't";
+		// for reference see
+		// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
+		string[] upperArr = new string[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
+		string[] lowerArr = new string[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
+		}
+		set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+		}
+	  }
+
+	  public virtual void testSingleHighSurrogate()
+	  {
+		string missing = "Term %s is missing in the set";
+		string falsePos = "Term %s is in the set but shouldn't";
+		string[] upperArr = new string[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"};
+
+		string[] lowerArr = new string[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"};
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
+		}
+		set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertFalse(string.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i]));
+		}
+	  }
+
+	  /// @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support is
+	  ///             no longer needed. 
+	  [Obsolete("(3.1) remove this test when lucene 3.0 "broken unicode 4" support is")]
+	  public virtual void testSupplementaryCharsBWCompat()
+	  {
+		string missing = "Term %s is missing in the set";
+		string falsePos = "Term %s is in the set but shouldn't";
+		// for reference see
+		// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
+		string[] upperArr = new string[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
+		string[] lowerArr = new string[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
+		CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+		}
+		set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT,missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+		}
+	  }
+
+	  /// @deprecated (3.1) remove this test when lucene 3.0 "broken unicode 4" support is
+	  ///             no longer needed. 
+	  [Obsolete("(3.1) remove this test when lucene 3.0 "broken unicode 4" support is")]
+	  public virtual void testSingleHighSurrogateBWComapt()
+	  {
+		string missing = "Term %s is missing in the set";
+		string falsePos = "Term %s is in the set but shouldn't";
+		string[] upperArr = new string[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"};
+
+		string[] lowerArr = new string[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"};
+		CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  if (i == lowerArr.Length - 1)
+		  {
+			assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+		  }
+		  else
+		  {
+			assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
+		  }
+		}
+		set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
+		foreach (string upper in upperArr)
+		{
+		  set.add(upper);
+		}
+		for (int i = 0; i < upperArr.Length; i++)
+		{
+		  assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+		  assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+		}
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecated") public void testCopyCharArraySetBWCompat()
+	  public virtual void testCopyCharArraySetBWCompat()
+	  {
+		CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
+		CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);
+
+		IList<string> stopwords = TEST_STOP_WORDS;
+		IList<string> stopwordsUpper = new List<string>();
+		foreach (string @string in stopwords)
+		{
+		  stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
+		}
+		setIngoreCase.addAll(TEST_STOP_WORDS);
+		setIngoreCase.add(Convert.ToInt32(1));
+		setCaseSensitive.addAll(TEST_STOP_WORDS);
+		setCaseSensitive.add(Convert.ToInt32(1));
+
+		CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
+		CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);
+
+		assertEquals(setIngoreCase.size(), copy.size());
+		assertEquals(setCaseSensitive.size(), copy.size());
+
+		assertTrue(copy.containsAll(stopwords));
+		assertTrue(copy.containsAll(stopwordsUpper));
+		assertTrue(copyCaseSens.containsAll(stopwords));
+		foreach (string @string in stopwordsUpper)
+		{
+		  assertFalse(copyCaseSens.contains(@string));
+		}
+		// test adding terms to the copy
+		IList<string> newWords = new List<string>();
+		foreach (string @string in stopwords)
+		{
+		  newWords.Add(@string + "_1");
+		}
+		copy.addAll(newWords);
+
+		assertTrue(copy.containsAll(stopwords));
+		assertTrue(copy.containsAll(stopwordsUpper));
+		assertTrue(copy.containsAll(newWords));
+		// new added terms are not in the source set
+		foreach (string @string in newWords)
+		{
+		  assertFalse(setIngoreCase.contains(@string));
+		  assertFalse(setCaseSensitive.contains(@string));
+
+		}
+	  }
+
+	  /// <summary>
+	  /// Test the static #copy() function with a CharArraySet as a source
+	  /// </summary>
+	  public virtual void testCopyCharArraySet()
+	  {
+		CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
+		CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);
+
+		IList<string> stopwords = TEST_STOP_WORDS;
+		IList<string> stopwordsUpper = new List<string>();
+		foreach (string @string in stopwords)
+		{
+		  stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
+		}
+		setIngoreCase.addAll(TEST_STOP_WORDS);
+		setIngoreCase.add(Convert.ToInt32(1));
+		setCaseSensitive.addAll(TEST_STOP_WORDS);
+		setCaseSensitive.add(Convert.ToInt32(1));
+
+		CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
+		CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);
+
+		assertEquals(setIngoreCase.size(), copy.size());
+		assertEquals(setCaseSensitive.size(), copy.size());
+
+		assertTrue(copy.containsAll(stopwords));
+		assertTrue(copy.containsAll(stopwordsUpper));
+		assertTrue(copyCaseSens.containsAll(stopwords));
+		foreach (string @string in stopwordsUpper)
+		{
+		  assertFalse(copyCaseSens.contains(@string));
+		}
+		// test adding terms to the copy
+		IList<string> newWords = new List<string>();
+		foreach (string @string in stopwords)
+		{
+		  newWords.Add(@string + "_1");
+		}
+		copy.addAll(newWords);
+
+		assertTrue(copy.containsAll(stopwords));
+		assertTrue(copy.containsAll(stopwordsUpper));
+		assertTrue(copy.containsAll(newWords));
+		// new added terms are not in the source set
+		foreach (string @string in newWords)
+		{
+		  assertFalse(setIngoreCase.contains(@string));
+		  assertFalse(setCaseSensitive.contains(@string));
+
+		}
+	  }
+
+	  /// <summary>
+	  /// Test the static #copy() function with a JDK <seealso cref="Set"/> as a source
+	  /// </summary>
+	  public virtual void testCopyJDKSet()
+	  {
+		ISet<string> set = new HashSet<string>();
+
+		IList<string> stopwords = TEST_STOP_WORDS;
+		IList<string> stopwordsUpper = new List<string>();
+		foreach (string @string in stopwords)
+		{
+		  stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
+		}
+		set.addAll(TEST_STOP_WORDS);
+
+		CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);
+
+		assertEquals(set.Count, copy.size());
+		assertEquals(set.Count, copy.size());
+
+		assertTrue(copy.containsAll(stopwords));
+		foreach (string @string in stopwordsUpper)
+		{
+		  assertFalse(copy.contains(@string));
+		}
+
+		IList<string> newWords = new List<string>();
+		foreach (string @string in stopwords)
+		{
+		  newWords.Add(@string + "_1");
+		}
+		copy.addAll(newWords);
+
+		assertTrue(copy.containsAll(stopwords));
+		assertTrue(copy.containsAll(newWords));
+		// new added terms are not in the source set
+		foreach (string @string in newWords)
+		{
+		  assertFalse(set.Contains(@string));
+		}
+	  }
+
+	  /// <summary>
+	  /// Tests a special case of <seealso cref="CharArraySet#copy(Version, Set)"/> where the
+	  /// set to copy is the <seealso cref="CharArraySet#EMPTY_SET"/>
+	  /// </summary>
+	  public virtual void testCopyEmptySet()
+	  {
+		assertSame(CharArraySet.EMPTY_SET, CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET));
+	  }
+
+	  /// <summary>
+	  /// Smoketests the static empty set
+	  /// </summary>
+	  public virtual void testEmptySet()
+	  {
+		assertEquals(0, CharArraySet.EMPTY_SET.size());
+
+		assertTrue(CharArraySet.EMPTY_SET.Empty);
+		foreach (string stopword in TEST_STOP_WORDS)
+		{
+		  assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
+		}
+		assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
+		assertFalse(CharArraySet.EMPTY_SET.contains((object) "foo"));
+		assertFalse(CharArraySet.EMPTY_SET.contains("foo".ToCharArray()));
+		assertFalse(CharArraySet.EMPTY_SET.contains("foo".ToCharArray(),0,3));
+	  }
+
+	  /// <summary>
+	  /// Test for NPE
+	  /// </summary>
+	  public virtual void testContainsWithNull()
+	  {
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+		try
+		{
+		  set.contains((char[]) null, 0, 10);
+		  fail("null value must raise NPE");
+		}
+		catch (System.NullReferenceException)
+		{
+		}
+		try
+		{
+		  set.contains((CharSequence) null);
+		  fail("null value must raise NPE");
+		}
+		catch (System.NullReferenceException)
+		{
+		}
+		try
+		{
+		  set.contains((object) null);
+		  fail("null value must raise NPE");
+		}
+		catch (System.NullReferenceException)
+		{
+		}
+	  }
+
+	  public virtual void testToString()
+	  {
+		CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test"));
+		assertEquals("[test]", set.ToString());
+		set.add("test2");
+		assertTrue(set.ToString().Contains(", "));
+
+		set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
+		assertEquals("[test]", set.ToString());
+		set.add("test2");
+		assertTrue(set.ToString().Contains(", "));
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
new file mode 100644
index 0000000..3581c7b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
@@ -0,0 +1,268 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LetterTokenizer = org.apache.lucene.analysis.core.LetterTokenizer;
+	using LowerCaseTokenizer = org.apache.lucene.analysis.core.LowerCaseTokenizer;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+
+
+	/// <summary>
+	/// Testcase for <seealso cref="CharTokenizer"/> subclasses
+	/// </summary>
+	public class TestCharTokenizers : BaseTokenStreamTestCase
+	{
+
+	  /*
+	   * test to read surrogate pairs without loosing the pairing 
+	   * if the surrogate pair is at the border of the internal IO buffer
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReadSupplementaryChars() throws java.io.IOException
+	  public virtual void testReadSupplementaryChars()
+	  {
+		StringBuilder builder = new StringBuilder();
+		// create random input
+		int num = 1024 + random().Next(1024);
+		num *= RANDOM_MULTIPLIER;
+		for (int i = 1; i < num; i++)
+		{
+		  builder.Append("\ud801\udc1cabc");
+		  if ((i % 10) == 0)
+		  {
+			builder.Append(" ");
+		  }
+		}
+		// internal buffer size is 1024 make sure we have a surrogate pair right at the border
+		builder.Insert(1023, "\ud801\udc1c");
+		Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString()));
+		assertTokenStreamContents(tokenizer, builder.ToString().ToLower(Locale.ROOT).split(" "));
+	  }
+
+	  /*
+	   * test to extend the buffer TermAttribute buffer internally. If the internal
+	   * alg that extends the size of the char array only extends by 1 char and the
+	   * next char to be filled in is a supplementary codepoint (using 2 chars) an
+	   * index out of bound exception is triggered.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExtendCharBuffer() throws java.io.IOException
+	  public virtual void testExtendCharBuffer()
+	  {
+		for (int i = 0; i < 40; i++)
+		{
+		  StringBuilder builder = new StringBuilder();
+		  for (int j = 0; j < 1 + i; j++)
+		  {
+			builder.Append("a");
+		  }
+		  builder.Append("\ud801\udc1cabc");
+		  Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString()));
+		  assertTokenStreamContents(tokenizer, new string[] {builder.ToString().ToLower(Locale.ROOT)});
+		}
+	  }
+
+	  /*
+	   * tests the max word length of 255 - tokenizer will split at the 255 char no matter what happens
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxWordLength() throws java.io.IOException
+	  public virtual void testMaxWordLength()
+	  {
+		StringBuilder builder = new StringBuilder();
+
+		for (int i = 0; i < 255; i++)
+		{
+		  builder.Append("A");
+		}
+		Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString() + builder.ToString()));
+		assertTokenStreamContents(tokenizer, new string[] {builder.ToString().ToLower(Locale.ROOT), builder.ToString().ToLower(Locale.ROOT)});
+	  }
+
+	  /*
+	   * tests the max word length of 255 with a surrogate pair at position 255
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxWordLengthWithSupplementary() throws java.io.IOException
+	  public virtual void testMaxWordLengthWithSupplementary()
+	  {
+		StringBuilder builder = new StringBuilder();
+
+		for (int i = 0; i < 254; i++)
+		{
+		  builder.Append("A");
+		}
+		builder.Append("\ud801\udc1c");
+		Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString() + builder.ToString()));
+		assertTokenStreamContents(tokenizer, new string[] {builder.ToString().ToLower(Locale.ROOT), builder.ToString().ToLower(Locale.ROOT)});
+	  }
+
+	  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCrossPlaneNormalization() throws java.io.IOException
+	  public virtual void testCrossPlaneNormalization()
+	  {
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+		int num = 1000 * RANDOM_MULTIPLIER;
+		for (int i = 0; i < num; i++)
+		{
+		  string s = TestUtil.randomUnicodeString(random());
+		  TokenStream ts = analyzer.tokenStream("foo", s);
+		  try
+		  {
+			ts.reset();
+			OffsetAttribute offsetAtt = ts.addAttribute(typeof(OffsetAttribute));
+			while (ts.incrementToken())
+			{
+			  string highlightedText = StringHelperClass.SubstringSpecial(s, offsetAtt.startOffset(), offsetAtt.endOffset());
+			  for (int j = 0, cp = 0; j < highlightedText.Length; j += char.charCount(cp))
+			  {
+				cp = char.ConvertToUtf32(highlightedText, j);
+				assertTrue("non-letter:" + cp.ToString("x"), char.IsLetter(cp));
+			  }
+			}
+			ts.end();
+		  }
+		  finally
+		  {
+			IOUtils.closeWhileHandlingException(ts);
+		  }
+		}
+		// just for fun
+		checkRandomData(random(), analyzer, num);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestCharTokenizers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestCharTokenizers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizerAnonymousInnerClassHelper(this, TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+
+		  private class LetterTokenizerAnonymousInnerClassHelper : LetterTokenizer
+		  {
+			  private readonly AnalyzerAnonymousInnerClassHelper outerInstance;
+
+			  public LetterTokenizerAnonymousInnerClassHelper(AnalyzerAnonymousInnerClassHelper outerInstance, UnknownType TEST_VERSION_CURRENT, Reader reader) : base(TEST_VERSION_CURRENT, reader)
+			  {
+				  this.outerInstance = outerInstance;
+			  }
+
+			  protected internal override int normalize(int c)
+			  {
+				if (c > 0xffff)
+				{
+				  return 'δ';
+				}
+				else
+				{
+				  return c;
+				}
+			  }
+		  }
+	  }
+
+	  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCrossPlaneNormalization2() throws java.io.IOException
+	  public virtual void testCrossPlaneNormalization2()
+	  {
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);
+		int num = 1000 * RANDOM_MULTIPLIER;
+		for (int i = 0; i < num; i++)
+		{
+		  string s = TestUtil.randomUnicodeString(random());
+		  TokenStream ts = analyzer.tokenStream("foo", s);
+		  try
+		  {
+			ts.reset();
+			OffsetAttribute offsetAtt = ts.addAttribute(typeof(OffsetAttribute));
+			while (ts.incrementToken())
+			{
+			  string highlightedText = StringHelperClass.SubstringSpecial(s, offsetAtt.startOffset(), offsetAtt.endOffset());
+			  for (int j = 0, cp = 0; j < highlightedText.Length; j += char.charCount(cp))
+			  {
+				cp = char.ConvertToUtf32(highlightedText, j);
+				assertTrue("non-letter:" + cp.ToString("x"), char.IsLetter(cp));
+			  }
+			}
+			ts.end();
+		  }
+		  finally
+		  {
+			IOUtils.closeWhileHandlingException(ts);
+		  }
+		}
+		// just for fun
+		checkRandomData(random(), analyzer, num);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestCharTokenizers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestCharTokenizers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizerAnonymousInnerClassHelper2(this, TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+
+		  private class LetterTokenizerAnonymousInnerClassHelper2 : LetterTokenizer
+		  {
+			  private readonly AnalyzerAnonymousInnerClassHelper2 outerInstance;
+
+			  public LetterTokenizerAnonymousInnerClassHelper2(AnalyzerAnonymousInnerClassHelper2 outerInstance, UnknownType TEST_VERSION_CURRENT, Reader reader) : base(TEST_VERSION_CURRENT, reader)
+			  {
+				  this.outerInstance = outerInstance;
+			  }
+
+			  protected internal override int normalize(int c)
+			  {
+				if (c <= 0xffff)
+				{
+				  return 0x1043C;
+				}
+				else
+				{
+				  return c;
+				}
+			  }
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
new file mode 100644
index 0000000..fb0fd6e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
@@ -0,0 +1,290 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharacterBuffer = org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+	using Version = org.apache.lucene.util.Version;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+	using Test = org.junit.Test;
+
+	/// <summary>
+	/// TestCase for the <seealso cref="CharacterUtils"/> class.
+	/// </summary>
+	public class TestCharacterUtils : LuceneTestCase
+	{
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testCodePointAtCharSequenceInt()
+	  public virtual void testCodePointAtCharSequenceInt()
+	  {
+		CharacterUtils java4 = CharacterUtils.getInstance(Version.LUCENE_30);
+		string cpAt3 = "Abc\ud801\udc1c";
+		string highSurrogateAt3 = "Abc\ud801";
+		assertEquals((int) 'A', java4.codePointAt(cpAt3, 0));
+		assertEquals((int) '\ud801', java4.codePointAt(cpAt3, 3));
+		assertEquals((int) '\ud801', java4.codePointAt(highSurrogateAt3, 3));
+		try
+		{
+		  java4.codePointAt(highSurrogateAt3, 4);
+		  fail("string index out of bounds");
+		}
+		catch (System.IndexOutOfRangeException)
+		{
+		}
+
+		CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
+		assertEquals((int) 'A', java5.codePointAt(cpAt3, 0));
+		assertEquals(char.toCodePoint('\ud801', '\udc1c'), java5.codePointAt(cpAt3, 3));
+		assertEquals((int) '\ud801', java5.codePointAt(highSurrogateAt3, 3));
+		try
+		{
+		  java5.codePointAt(highSurrogateAt3, 4);
+		  fail("string index out of bounds");
+		}
+		catch (System.IndexOutOfRangeException)
+		{
+		}
+
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testCodePointAtCharArrayIntInt()
+	  public virtual void testCodePointAtCharArrayIntInt()
+	  {
+		CharacterUtils java4 = CharacterUtils.getInstance(Version.LUCENE_30);
+		char[] cpAt3 = "Abc\ud801\udc1c".ToCharArray();
+		char[] highSurrogateAt3 = "Abc\ud801".ToCharArray();
+		assertEquals((int) 'A', java4.codePointAt(cpAt3, 0, 2));
+		assertEquals((int) '\ud801', java4.codePointAt(cpAt3, 3, 5));
+		assertEquals((int) '\ud801', java4.codePointAt(highSurrogateAt3, 3, 4));
+
+		CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
+		assertEquals((int) 'A', java5.codePointAt(cpAt3, 0, 2));
+		assertEquals(char.toCodePoint('\ud801', '\udc1c'), java5.codePointAt(cpAt3, 3, 5));
+		assertEquals((int) '\ud801', java5.codePointAt(highSurrogateAt3, 3, 4));
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testCodePointCount()
+	  public virtual void testCodePointCount()
+	  {
+		CharacterUtils java4 = CharacterUtils.Java4Instance;
+		CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String s = org.apache.lucene.util.TestUtil.randomUnicodeString(random());
+		string s = TestUtil.randomUnicodeString(random());
+		assertEquals(s.Length, java4.codePointCount(s));
+		assertEquals(char.codePointCount(s, 0, s.Length), java5.codePointCount(s));
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testOffsetByCodePoint()
+	  public virtual void testOffsetByCodePoint()
+	  {
+		CharacterUtils java4 = CharacterUtils.Java4Instance;
+		CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
+		for (int i = 0; i < 10; ++i)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] s = org.apache.lucene.util.TestUtil.randomUnicodeString(random()).toCharArray();
+		  char[] s = TestUtil.randomUnicodeString(random()).toCharArray();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int index = org.apache.lucene.util.TestUtil.nextInt(random(), 0, s.length);
+		  int index = TestUtil.Next(random(), 0, s.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int offset = random().nextInt(7) - 3;
+		  int offset = random().Next(7) - 3;
+		  try
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int o = java4.offsetByCodePoints(s, 0, s.length, index, offset);
+			int o = java4.offsetByCodePoints(s, 0, s.Length, index, offset);
+			assertEquals(o, index + offset);
+		  }
+		  catch (System.IndexOutOfRangeException)
+		  {
+			assertTrue((index + offset) < 0 || (index + offset) > s.Length);
+		  }
+
+		  int o;
+		  try
+		  {
+			o = java5.offsetByCodePoints(s, 0, s.Length, index, offset);
+		  }
+		  catch (System.IndexOutOfRangeException)
+		  {
+			try
+			{
+			  char.offsetByCodePoints(s, 0, s.Length, index, offset);
+			  fail();
+			}
+			catch (System.IndexOutOfRangeException)
+			{
+			  // OK
+			}
+			o = -1;
+		  }
+		  if (o >= 0)
+		  {
+			assertEquals(char.offsetByCodePoints(s, 0, s.Length, index, offset), o);
+		  }
+		}
+	  }
+
+	  public virtual void testConversions()
+	  {
+		CharacterUtils java4 = CharacterUtils.Java4Instance;
+		CharacterUtils java5 = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
+		testConversions(java4);
+		testConversions(java5);
+	  }
+
+	  private void testConversions(CharacterUtils charUtils)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] orig = org.apache.lucene.util.TestUtil.randomUnicodeString(random(), 100).toCharArray();
+		char[] orig = TestUtil.randomUnicodeString(random(), 100).toCharArray();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] buf = new int[orig.length];
+		int[] buf = new int[orig.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] restored = new char[buf.length];
+		char[] restored = new char[buf.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int o1 = org.apache.lucene.util.TestUtil.nextInt(random(), 0, Math.min(5, orig.length));
+		int o1 = TestUtil.Next(random(), 0, Math.Min(5, orig.Length));
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int o2 = org.apache.lucene.util.TestUtil.nextInt(random(), 0, o1);
+		int o2 = TestUtil.Next(random(), 0, o1);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int o3 = org.apache.lucene.util.TestUtil.nextInt(random(), 0, o1);
+		int o3 = TestUtil.Next(random(), 0, o1);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointCount = charUtils.toCodePoints(orig, o1, orig.length - o1, buf, o2);
+		int codePointCount = charUtils.toCodePoints(orig, o1, orig.Length - o1, buf, o2);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int charCount = charUtils.toChars(buf, o2, codePointCount, restored, o3);
+		int charCount = charUtils.toChars(buf, o2, codePointCount, restored, o3);
+		assertEquals(orig.Length - o1, charCount);
+		assertArrayEquals(Arrays.copyOfRange(orig, o1, o1 + charCount), Arrays.copyOfRange(restored, o3, o3 + charCount));
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testNewCharacterBuffer()
+	  public virtual void testNewCharacterBuffer()
+	  {
+		CharacterBuffer newCharacterBuffer = CharacterUtils.newCharacterBuffer(1024);
+		assertEquals(1024, newCharacterBuffer.Buffer.length);
+		assertEquals(0, newCharacterBuffer.Offset);
+		assertEquals(0, newCharacterBuffer.Length);
+
+		newCharacterBuffer = CharacterUtils.newCharacterBuffer(2);
+		assertEquals(2, newCharacterBuffer.Buffer.length);
+		assertEquals(0, newCharacterBuffer.Offset);
+		assertEquals(0, newCharacterBuffer.Length);
+
+		try
+		{
+		  newCharacterBuffer = CharacterUtils.newCharacterBuffer(1);
+		  fail("length must be >= 2");
+		}
+		catch (System.ArgumentException)
+		{
+		}
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testFillNoHighSurrogate() throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testFillNoHighSurrogate()
+	  {
+		Version[] versions = new Version[] {Version.LUCENE_30, TEST_VERSION_CURRENT};
+		foreach (Version version in versions)
+		{
+		  CharacterUtils instance = CharacterUtils.getInstance(version);
+		  Reader reader = new StringReader("helloworld");
+		  CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
+		  assertTrue(instance.fill(buffer,reader));
+		  assertEquals(0, buffer.Offset);
+		  assertEquals(6, buffer.Length);
+		  assertEquals("hellow", new string(buffer.Buffer));
+		  assertFalse(instance.fill(buffer,reader));
+		  assertEquals(4, buffer.Length);
+		  assertEquals(0, buffer.Offset);
+
+		  assertEquals("orld", new string(buffer.Buffer, buffer.Offset, buffer.Length));
+		  assertFalse(instance.fill(buffer,reader));
+		}
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testFillJava15() throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testFillJava15()
+	  {
+		string input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
+		CharacterUtils instance = CharacterUtils.getInstance(TEST_VERSION_CURRENT);
+		Reader reader = new StringReader(input);
+		CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
+		assertTrue(instance.fill(buffer, reader));
+		assertEquals(4, buffer.Length);
+		assertEquals("1234", new string(buffer.Buffer, buffer.Offset, buffer.Length));
+		assertTrue(instance.fill(buffer, reader));
+		assertEquals(5, buffer.Length);
+		assertEquals("\ud801\udc1c789", new string(buffer.Buffer));
+		assertTrue(instance.fill(buffer, reader));
+		assertEquals(4, buffer.Length);
+		assertEquals("123\ud801", new string(buffer.Buffer, buffer.Offset, buffer.Length));
+		assertFalse(instance.fill(buffer, reader));
+		assertEquals(3, buffer.Length);
+		assertEquals("\ud801\udc1c\ud801", new string(buffer.Buffer, buffer.Offset, buffer.Length));
+		assertFalse(instance.fill(buffer, reader));
+		assertEquals(0, buffer.Length);
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test public void testFillJava14() throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testFillJava14()
+	  {
+		string input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
+		CharacterUtils instance = CharacterUtils.getInstance(Version.LUCENE_30);
+		Reader reader = new StringReader(input);
+		CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
+		assertTrue(instance.fill(buffer, reader));
+		assertEquals(5, buffer.Length);
+		assertEquals("1234\ud801", new string(buffer.Buffer, buffer.Offset, buffer.Length));
+		assertTrue(instance.fill(buffer, reader));
+		assertEquals(5, buffer.Length);
+		assertEquals("\udc1c7891", new string(buffer.Buffer));
+		buffer = CharacterUtils.newCharacterBuffer(6);
+		assertTrue(instance.fill(buffer, reader));
+		assertEquals(6, buffer.Length);
+		assertEquals("23\ud801\ud801\udc1c\ud801", new string(buffer.Buffer, buffer.Offset, buffer.Length));
+		assertFalse(instance.fill(buffer, reader));
+
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElision.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElision.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElision.cs
new file mode 100644
index 0000000..4a53a04
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElision.cs
@@ -0,0 +1,89 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using FrenchAnalyzer = org.apache.lucene.analysis.fr.FrenchAnalyzer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// 
+	public class TestElision : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testElision() throws Exception
+	  public virtual void testElision()
+	  {
+		string test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
+		Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
+		CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
+		TokenFilter filter = new ElisionFilter(tokenizer, articles);
+		IList<string> tas = filter(filter);
+		assertEquals("embrouille", tas[4]);
+		assertEquals("O'brian", tas[6]);
+		assertEquals("enfin", tas[7]);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private java.util.List<String> filter(org.apache.lucene.analysis.TokenFilter filter) throws java.io.IOException
+	  private IList<string> filter(TokenFilter filter)
+	  {
+		IList<string> tas = new List<string>();
+		CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute));
+		filter.reset();
+		while (filter.incrementToken())
+		{
+		  tas.Add(termAtt.ToString());
+		}
+		filter.end();
+		filter.close();
+		return tas;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+		checkOneTerm(a, "", "");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestElision outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestElision outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new ElisionFilter(tokenizer, FrenchAnalyzer.DEFAULT_ARTICLES));
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElisionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElisionFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElisionFilterFactory.cs
new file mode 100644
index 0000000..c349fa5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestElisionFilterFactory.cs
@@ -0,0 +1,85 @@
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+
+	/// <summary>
+	/// Simple tests to ensure the French elision filter factory is working.
+	/// </summary>
+	public class TestElisionFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Ensure the filter actually normalizes text.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testElision() throws Exception
+	  public virtual void testElision()
+	  {
+		Reader reader = new StringReader("l'avion");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("Elision", "articles", "frenchArticles.txt").create(stream);
+		assertTokenStreamContents(stream, new string[] {"avion"});
+	  }
+
+	  /// <summary>
+	  /// Test creating an elision filter without specifying any articles
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefaultArticles() throws Exception
+	  public virtual void testDefaultArticles()
+	  {
+		Reader reader = new StringReader("l'avion");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("Elision").create(stream);
+		assertTokenStreamContents(stream, new string[] {"avion"});
+	  }
+
+	  /// <summary>
+	  /// Test setting ignoreCase=true
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCaseInsensitive() throws Exception
+	  public virtual void testCaseInsensitive()
+	  {
+		Reader reader = new StringReader("L'avion");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("Elision", "articles", "frenchArticles.txt", "ignoreCase", "true").create(stream);
+		assertTokenStreamContents(stream, new string[] {"avion"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("Elision", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
new file mode 100644
index 0000000..b2e562e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
@@ -0,0 +1,121 @@
+using System;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+
+	public class TestFilesystemResourceLoader : LuceneTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void assertNotFound(ResourceLoader rl) throws Exception
+	  private void assertNotFound(ResourceLoader rl)
+	  {
+		try
+		{
+		  IOUtils.closeWhileHandlingException(rl.openResource("/this-directory-really-really-really-should-not-exist/foo/bar.txt"));
+		  fail("The resource does not exist, should fail!");
+		}
+		catch (IOException)
+		{
+		  // pass
+		}
+		try
+		{
+		  rl.newInstance("org.apache.lucene.analysis.FooBarFilterFactory", typeof(TokenFilterFactory));
+		  fail("The class does not exist, should fail!");
+		}
+		catch (Exception)
+		{
+		  // pass
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void assertClasspathDelegation(ResourceLoader rl) throws Exception
+	  private void assertClasspathDelegation(ResourceLoader rl)
+	  {
+		// try a stopwords file from classpath
+		CharArraySet set = WordlistLoader.getSnowballWordSet(new System.IO.StreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), Encoding.UTF8), TEST_VERSION_CURRENT);
+		assertTrue(set.contains("you"));
+		// try to load a class; we use string comparison because classloader may be different...
+//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
+		assertEquals("org.apache.lucene.analysis.util.RollingCharBuffer", rl.newInstance("org.apache.lucene.analysis.util.RollingCharBuffer", typeof(object)).GetType().FullName);
+		// theoretically classes should also be loadable:
+		IOUtils.closeWhileHandlingException(rl.openResource("java/lang/String.class"));
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBaseDir() throws Exception
+	  public virtual void testBaseDir()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.io.File super = createTempDir("fsResourceLoaderBase").getAbsoluteFile();
+		File @base = createTempDir("fsResourceLoaderBase").AbsoluteFile;
+		try
+		{
+		  @base.mkdirs();
+		  Writer os = new System.IO.StreamWriter(new System.IO.FileStream(@base, "template.txt", System.IO.FileMode.Create, System.IO.FileAccess.Write), Encoding.UTF8);
+		  try
+		  {
+			os.write("foobar\n");
+		  }
+		  finally
+		  {
+			IOUtils.closeWhileHandlingException(os);
+		  }
+
+		  ResourceLoader rl = new FilesystemResourceLoader(@base);
+		  assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0));
+		  // Same with full path name:
+		  string fullPath = (new File(@base, "template.txt")).ToString();
+		  assertEquals("foobar", WordlistLoader.getLines(rl.openResource(fullPath), StandardCharsets.UTF_8).get(0));
+		  assertClasspathDelegation(rl);
+		  assertNotFound(rl);
+
+		  // now use RL without base dir:
+		  rl = new FilesystemResourceLoader();
+		  assertEquals("foobar", WordlistLoader.getLines(rl.openResource((new File(@base, "template.txt")).ToString()), StandardCharsets.UTF_8).get(0));
+		  assertClasspathDelegation(rl);
+		  assertNotFound(rl);
+		}
+		finally
+		{
+		  TestUtil.rm(@base);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDelegation() throws Exception
+	  public virtual void testDelegation()
+	  {
+		ResourceLoader rl = new FilesystemResourceLoader(null, new StringMockResourceLoader("foobar\n"));
+		assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0));
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
new file mode 100644
index 0000000..5e9b2b6
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
@@ -0,0 +1,136 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+
+	public class TestRollingCharBuffer : LuceneTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+	  public virtual void test()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ITERS = atLeast(1000);
+		int ITERS = atLeast(1000);
+
+		RollingCharBuffer buffer = new RollingCharBuffer();
+
+		Random random = random();
+		for (int iter = 0;iter < ITERS;iter++)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int stringLen = random.nextBoolean() ? random.nextInt(50) : random.nextInt(20000);
+		  int stringLen = random.nextBoolean() ? random.Next(50) : random.Next(20000);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String s;
+		  string s;
+		  if (stringLen == 0)
+		  {
+			s = "";
+		  }
+		  else
+		  {
+			s = TestUtil.randomUnicodeString(random, stringLen);
+		  }
+		  if (VERBOSE)
+		  {
+			Console.WriteLine("\nTEST: iter=" + iter + " s.length()=" + s.Length);
+		  }
+		  buffer.reset(new StringReader(s));
+		  int nextRead = 0;
+		  int availCount = 0;
+		  while (nextRead < s.Length)
+		  {
+			if (VERBOSE)
+			{
+			  Console.WriteLine("  cycle nextRead=" + nextRead + " avail=" + availCount);
+			}
+			if (availCount == 0 || random.nextBoolean())
+			{
+			  // Read next char
+			  if (VERBOSE)
+			  {
+				Console.WriteLine("    new char");
+			  }
+			  assertEquals(s[nextRead], buffer.get(nextRead));
+			  nextRead++;
+			  availCount++;
+			}
+			else if (random.nextBoolean())
+			{
+			  // Read previous char
+			  int pos = TestUtil.Next(random, nextRead - availCount, nextRead - 1);
+			  if (VERBOSE)
+			  {
+				Console.WriteLine("    old char pos=" + pos);
+			  }
+			  assertEquals(s[pos], buffer.get(pos));
+			}
+			else
+			{
+			  // Read slice
+			  int length;
+			  if (availCount == 1)
+			  {
+				length = 1;
+			  }
+			  else
+			  {
+				length = TestUtil.Next(random, 1, availCount);
+			  }
+			  int start;
+			  if (length == availCount)
+			  {
+				start = nextRead - availCount;
+			  }
+			  else
+			  {
+				start = nextRead - availCount + random.Next(availCount - length);
+			  }
+			  if (VERBOSE)
+			  {
+				Console.WriteLine("    slice start=" + start + " length=" + length);
+			  }
+			  assertEquals(s.Substring(start, length), new string(buffer.get(start, length)));
+			}
+
+			if (availCount > 0 && random.Next(20) == 17)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int toFree = random.nextInt(availCount);
+			  int toFree = random.Next(availCount);
+			  if (VERBOSE)
+			  {
+				Console.WriteLine("    free " + toFree + " (avail=" + (availCount - toFree) + ")");
+			  }
+			  buffer.freeBefore(nextRead - (availCount - toFree));
+			  availCount -= toFree;
+			}
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
new file mode 100644
index 0000000..0ea4c96
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
@@ -0,0 +1,251 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// Basic tests for <seealso cref="SegmentingTokenizerBase"/> </summary>
+	public class TestSegmentingTokenizerBase : BaseTokenStreamTestCase
+	{
+	  private Analyzer sentence = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			return new TokenStreamComponents(new WholeSentenceTokenizer(reader));
+		  }
+	  }
+
+	  private Analyzer sentenceAndWord = new AnalyzerAnonymousInnerClassHelper2();
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper2()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			return new TokenStreamComponents(new SentenceAndWordTokenizer(reader));
+		  }
+	  }
+
+	  /// <summary>
+	  /// Some simple examples, just outputting the whole sentence boundaries as "terms" </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+	  public virtual void testBasics()
+	  {
+		assertAnalyzesTo(sentence, "The acronym for United States is U.S. but this doesn't end a sentence", new string[] {"The acronym for United States is U.S. but this doesn't end a sentence"});
+		assertAnalyzesTo(sentence, "He said, \"Are you going?\" John shook his head.", new string[] {"He said, \"Are you going?\" ", "John shook his head."});
+	  }
+
+	  /// <summary>
+	  /// Test a subclass that sets some custom attribute values </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCustomAttributes() throws java.io.IOException
+	  public virtual void testCustomAttributes()
+	  {
+		assertAnalyzesTo(sentenceAndWord, "He said, \"Are you going?\" John shook his head.", new string[] {"He", "said", "Are", "you", "going", "John", "shook", "his", "head"}, new int[] {0, 3, 10, 14, 18, 26, 31, 37, 41}, new int[] {2, 7, 13, 17, 23, 30, 36, 40, 45}, new int[] {1, 1, 1, 1, 1, 2, 1, 1, 1});
+	  }
+
+	  /// <summary>
+	  /// Tests tokenstream reuse </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReuse() throws java.io.IOException
+	  public virtual void testReuse()
+	  {
+		assertAnalyzesTo(sentenceAndWord, "He said, \"Are you going?\"", new string[] {"He", "said", "Are", "you", "going"}, new int[] {0, 3, 10, 14, 18}, new int[] {2, 7, 13, 17, 23}, new int[] {1, 1, 1, 1, 1});
+		assertAnalyzesTo(sentenceAndWord, "John shook his head.", new string[] {"John", "shook", "his", "head"}, new int[] {0, 5, 11, 15}, new int[] {4, 10, 14, 19}, new int[] {1, 1, 1, 1});
+	  }
+
+	  /// <summary>
+	  /// Tests TokenStream.end() </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEnd() throws java.io.IOException
+	  public virtual void testEnd()
+	  {
+		// BaseTokenStreamTestCase asserts that end() is set to our StringReader's length for us here.
+		// we add some junk whitespace to the end just to test it.
+		assertAnalyzesTo(sentenceAndWord, "John shook his head          ", new string[] {"John", "shook", "his", "head"});
+		assertAnalyzesTo(sentenceAndWord, "John shook his head.          ", new string[] {"John", "shook", "his", "head"});
+	  }
+
+	  /// <summary>
+	  /// Tests terms which span across boundaries </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHugeDoc() throws java.io.IOException
+	  public virtual void testHugeDoc()
+	  {
+		StringBuilder sb = new StringBuilder();
+		char[] whitespace = new char[4094];
+		Arrays.fill(whitespace, '\n');
+		sb.Append(whitespace);
+		sb.Append("testing 1234");
+		string input = sb.ToString();
+		assertAnalyzesTo(sentenceAndWord, input, new string[] {"testing", "1234"});
+	  }
+
+	  /// <summary>
+	  /// Tests the handling of binary/malformed data </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHugeTerm() throws java.io.IOException
+	  public virtual void testHugeTerm()
+	  {
+		StringBuilder sb = new StringBuilder();
+		for (int i = 0; i < 10240; i++)
+		{
+		  sb.Append('a');
+		}
+		string input = sb.ToString();
+		char[] token = new char[1024];
+		Arrays.fill(token, 'a');
+		string expectedToken = new string(token);
+		string[] expected = new string[] {expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken};
+		assertAnalyzesTo(sentence, input, expected);
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), sentence, 10000 * RANDOM_MULTIPLIER);
+		checkRandomData(random(), sentenceAndWord, 10000 * RANDOM_MULTIPLIER);
+	  }
+
+	  // some tokenizers for testing
+
+	  /// <summary>
+	  /// silly tokenizer that just returns whole sentences as tokens </summary>
+	  internal class WholeSentenceTokenizer : SegmentingTokenizerBase
+	  {
+		internal int sentenceStart, sentenceEnd;
+		internal bool hasSentence;
+
+		internal CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+		public WholeSentenceTokenizer(Reader reader) : base(reader, BreakIterator.getSentenceInstance(Locale.ROOT))
+		{
+		}
+
+		protected internal override void setNextSentence(int sentenceStart, int sentenceEnd)
+		{
+		  this.sentenceStart = sentenceStart;
+		  this.sentenceEnd = sentenceEnd;
+		  hasSentence = true;
+		}
+
+		protected internal override bool incrementWord()
+		{
+		  if (hasSentence)
+		  {
+			hasSentence = false;
+			clearAttributes();
+			termAtt.copyBuffer(buffer, sentenceStart, sentenceEnd - sentenceStart);
+			offsetAtt.setOffset(correctOffset(offset + sentenceStart), correctOffset(offset + sentenceEnd));
+			return true;
+		  }
+		  else
+		  {
+			return false;
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// simple tokenizer, that bumps posinc + 1 for tokens after a 
+	  /// sentence boundary to inhibit phrase queries without slop.
+	  /// </summary>
+	  internal class SentenceAndWordTokenizer : SegmentingTokenizerBase
+	  {
+		internal int sentenceStart, sentenceEnd;
+		internal int wordStart, wordEnd;
+		internal int posBoost = -1; // initially set to -1 so the first word in the document doesn't get a pos boost
+
+		internal CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+		internal PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+		public SentenceAndWordTokenizer(Reader reader) : base(reader, BreakIterator.getSentenceInstance(Locale.ROOT))
+		{
+		}
+
+		protected internal override void setNextSentence(int sentenceStart, int sentenceEnd)
+		{
+		  this.wordStart = this.wordEnd = this.sentenceStart = sentenceStart;
+		  this.sentenceEnd = sentenceEnd;
+		  posBoost++;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  posBoost = -1;
+		}
+
+		protected internal override bool incrementWord()
+		{
+		  wordStart = wordEnd;
+		  while (wordStart < sentenceEnd)
+		  {
+			if (char.IsLetterOrDigit(buffer[wordStart]))
+			{
+			  break;
+			}
+			wordStart++;
+		  }
+
+		  if (wordStart == sentenceEnd)
+		  {
+			  return false;
+		  }
+
+		  wordEnd = wordStart + 1;
+		  while (wordEnd < sentenceEnd && char.IsLetterOrDigit(buffer[wordEnd]))
+		  {
+			wordEnd++;
+		  }
+
+		  clearAttributes();
+		  termAtt.copyBuffer(buffer, wordStart, wordEnd - wordStart);
+		  offsetAtt.setOffset(correctOffset(offset + wordStart), correctOffset(offset + wordEnd));
+		  posIncAtt.PositionIncrement = posIncAtt.PositionIncrement + posBoost;
+		  posBoost = 0;
+		  return true;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
new file mode 100644
index 0000000..a3e5235
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
@@ -0,0 +1,79 @@
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+
+	public class TestWordlistLoader : LuceneTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWordlistLoading() throws java.io.IOException
+	  public virtual void testWordlistLoading()
+	  {
+		string s = "ONE\n  two \nthree";
+		CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT);
+		checkSet(wordSet1);
+		CharArraySet wordSet2 = WordlistLoader.getWordSet(new System.IO.StreamReader(new StringReader(s)), TEST_VERSION_CURRENT);
+		checkSet(wordSet2);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComments() throws Exception
+	  public virtual void testComments()
+	  {
+		string s = "ONE\n  two \nthree\n#comment";
+		CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT);
+		checkSet(wordSet1);
+		assertFalse(wordSet1.contains("#comment"));
+		assertFalse(wordSet1.contains("comment"));
+	  }
+
+
+	  private void checkSet(CharArraySet wordset)
+	  {
+		assertEquals(3, wordset.size());
+		assertTrue(wordset.contains("ONE")); // case is not modified
+		assertTrue(wordset.contains("two")); // surrounding whitespace is removed
+		assertTrue(wordset.contains("three"));
+		assertFalse(wordset.contains("four"));
+	  }
+
+	  /// <summary>
+	  /// Test stopwords in snowball format
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSnowballListLoading() throws java.io.IOException
+	  public virtual void testSnowballListLoading()
+	  {
+		string s = "|comment\n" + " |comment\n" + "\n" + "  \t\n" + " |comment | comment\n" + "ONE\n" + "   two   \n" + " three   four five \n" + "six seven | comment\n"; //multiple stopwords + comment -  multiple stopwords -  stopword with leading/trailing space -  stopword, in uppercase -  commented line with comment -  line with only whitespace -  blank line -  commented line with leading whitespace -  commented line
+		CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT);
+		assertEquals(7, wordset.size());
+		assertTrue(wordset.contains("ONE"));
+		assertTrue(wordset.contains("two"));
+		assertTrue(wordset.contains("three"));
+		assertTrue(wordset.contains("four"));
+		assertTrue(wordset.contains("five"));
+		assertTrue(wordset.contains("six"));
+		assertTrue(wordset.contains("seven"));
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/TestWikipediaTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/TestWikipediaTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/TestWikipediaTokenizerFactory.cs
new file mode 100644
index 0000000..c87a2e5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/TestWikipediaTokenizerFactory.cs
@@ -0,0 +1,56 @@
+namespace org.apache.lucene.analysis.wikipedia
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the wikipedia tokenizer is working.
+	/// </summary>
+	public class TestWikipediaTokenizerFactory : BaseTokenStreamFactoryTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenizer() throws Exception
+	  public virtual void testTokenizer()
+	  {
+		Reader reader = new StringReader("This is a [[Category:foo]]");
+		Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(reader);
+		assertTokenStreamContents(tokenizer, new string[] {"This", "is", "a", "foo"}, new int[] {0, 5, 8, 21}, new int[] {4, 7, 9, 24}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY}, new int[] {1, 1, 1, 1});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenizerFactory("Wikipedia", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message