lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [23/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:39:12 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/CommonGramsFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/CommonGramsFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/CommonGramsFilterTest.cs
new file mode 100644
index 0000000..fcc15c5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/CommonGramsFilterTest.cs
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+namespace org.apache.lucene.analysis.commongrams
+{
+
+
+	using org.apache.lucene.analysis;
+	using WhitespaceTokenizer = org.apache.lucene.analysis.core.WhitespaceTokenizer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// Tests CommonGrams(Query)Filter
+	/// </summary>
+	public class CommonGramsFilterTest : BaseTokenStreamTestCase
+	{
+	  private static readonly CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("s", "a", "b", "c", "d", "the", "of"), false);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReset() throws Exception
+	  public virtual void testReset()
+	  {
+		const string input = "How the s a brown s cow d like A B thing?";
+		WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+
+		CharTermAttribute term = cgf.addAttribute(typeof(CharTermAttribute));
+		cgf.reset();
+		assertTrue(cgf.incrementToken());
+		assertEquals("How", term.ToString());
+		assertTrue(cgf.incrementToken());
+		assertEquals("How_the", term.ToString());
+		assertTrue(cgf.incrementToken());
+		assertEquals("the", term.ToString());
+		assertTrue(cgf.incrementToken());
+		assertEquals("the_s", term.ToString());
+		cgf.close();
+
+		wt.Reader = new StringReader(input);
+		cgf.reset();
+		assertTrue(cgf.incrementToken());
+		assertEquals("How", term.ToString());
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testQueryReset() throws Exception
+	  public virtual void testQueryReset()
+	  {
+		const string input = "How the s a brown s cow d like A B thing?";
+		WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
+
+		CharTermAttribute term = wt.addAttribute(typeof(CharTermAttribute));
+		nsf.reset();
+		assertTrue(nsf.incrementToken());
+		assertEquals("How_the", term.ToString());
+		assertTrue(nsf.incrementToken());
+		assertEquals("the_s", term.ToString());
+		nsf.close();
+
+		wt.Reader = new StringReader(input);
+		nsf.reset();
+		assertTrue(nsf.incrementToken());
+		assertEquals("How_the", term.ToString());
+	  }
+
+	  /// <summary>
+	  /// This is for testing CommonGramsQueryFilter which outputs a set of tokens
+	  /// optimized for querying with only one token at each position, either a
+	  /// unigram or a bigram It also will not return a token for the final position
+	  /// if the final word is already in the preceding bigram Example:(three
+	  /// tokens/positions in)
+	  /// "foo bar the"=>"foo:1|bar:2,bar-the:2|the:3=> "foo" "bar-the" (2 tokens
+	  /// out)
+	  /// 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCommonGramsQueryFilter() throws Exception
+	  public virtual void testCommonGramsQueryFilter()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+
+		// Stop words used below are "of" "the" and "s"
+
+		// two word queries
+		assertAnalyzesTo(a, "brown fox", new string[] {"brown", "fox"});
+		assertAnalyzesTo(a, "the fox", new string[] {"the_fox"});
+		assertAnalyzesTo(a, "fox of", new string[] {"fox_of"});
+		assertAnalyzesTo(a, "of the", new string[] {"of_the"});
+
+		// one word queries
+		assertAnalyzesTo(a, "the", new string[] {"the"});
+		assertAnalyzesTo(a, "foo", new string[] {"foo"});
+
+		// 3 word combinations s=stopword/common word n=not a stop word
+		assertAnalyzesTo(a, "n n n", new string[] {"n", "n", "n"});
+		assertAnalyzesTo(a, "quick brown fox", new string[] {"quick", "brown", "fox"});
+
+		assertAnalyzesTo(a, "n n s", new string[] {"n", "n_s"});
+		assertAnalyzesTo(a, "quick brown the", new string[] {"quick", "brown_the"});
+
+		assertAnalyzesTo(a, "n s n", new string[] {"n_s", "s_n"});
+		assertAnalyzesTo(a, "quick the brown", new string[] {"quick_the", "the_brown"});
+
+		assertAnalyzesTo(a, "n s s", new string[] {"n_s", "s_s"});
+		assertAnalyzesTo(a, "fox of the", new string[] {"fox_of", "of_the"});
+
+		assertAnalyzesTo(a, "s n n", new string[] {"s_n", "n", "n"});
+		assertAnalyzesTo(a, "the quick brown", new string[] {"the_quick", "quick", "brown"});
+
+		assertAnalyzesTo(a, "s n s", new string[] {"s_n", "n_s"});
+		assertAnalyzesTo(a, "the fox of", new string[] {"the_fox", "fox_of"});
+
+		assertAnalyzesTo(a, "s s n", new string[] {"s_s", "s_n"});
+		assertAnalyzesTo(a, "of the fox", new string[] {"of_the", "the_fox"});
+
+		assertAnalyzesTo(a, "s s s", new string[] {"s_s", "s_s"});
+		assertAnalyzesTo(a, "of the of", new string[] {"of_the", "the_of"});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly CommonGramsFilterTest outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(CommonGramsFilterTest outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override TokenStreamComponents createComponents(string field, Reader @in)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(@in, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords)));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCommonGramsFilter() throws Exception
+	  public virtual void testCommonGramsFilter()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+
+		// Stop words used below are "of" "the" and "s"
+		// one word queries
+		assertAnalyzesTo(a, "the", new string[] {"the"});
+		assertAnalyzesTo(a, "foo", new string[] {"foo"});
+
+		// two word queries
+		assertAnalyzesTo(a, "brown fox", new string[] {"brown", "fox"}, new int[] {1, 1});
+		assertAnalyzesTo(a, "the fox", new string[] {"the", "the_fox", "fox"}, new int[] {1, 0, 1});
+		assertAnalyzesTo(a, "fox of", new string[] {"fox", "fox_of", "of"}, new int[] {1, 0, 1});
+		assertAnalyzesTo(a, "of the", new string[] {"of", "of_the", "the"}, new int[] {1, 0, 1});
+
+		// 3 word combinations s=stopword/common word n=not a stop word
+		assertAnalyzesTo(a, "n n n", new string[] {"n", "n", "n"}, new int[] {1, 1, 1});
+		assertAnalyzesTo(a, "quick brown fox", new string[] {"quick", "brown", "fox"}, new int[] {1, 1, 1});
+
+		assertAnalyzesTo(a, "n n s", new string[] {"n", "n", "n_s", "s"}, new int[] {1, 1, 0, 1});
+		assertAnalyzesTo(a, "quick brown the", new string[] {"quick", "brown", "brown_the", "the"}, new int[] {1, 1, 0, 1});
+
+		assertAnalyzesTo(a, "n s n", new string[] {"n", "n_s", "s", "s_n", "n"}, new int[] {1, 0, 1, 0, 1});
+		assertAnalyzesTo(a, "quick the fox", new string[] {"quick", "quick_the", "the", "the_fox", "fox"}, new int[] {1, 0, 1, 0, 1});
+
+		assertAnalyzesTo(a, "n s s", new string[] {"n", "n_s", "s", "s_s", "s"}, new int[] {1, 0, 1, 0, 1});
+		assertAnalyzesTo(a, "fox of the", new string[] {"fox", "fox_of", "of", "of_the", "the"}, new int[] {1, 0, 1, 0, 1});
+
+		assertAnalyzesTo(a, "s n n", new string[] {"s", "s_n", "n", "n"}, new int[] {1, 0, 1, 1});
+		assertAnalyzesTo(a, "the quick brown", new string[] {"the", "the_quick", "quick", "brown"}, new int[] {1, 0, 1, 1});
+
+		assertAnalyzesTo(a, "s n s", new string[] {"s", "s_n", "n", "n_s", "s"}, new int[] {1, 0, 1, 0, 1});
+		assertAnalyzesTo(a, "the fox of", new string[] {"the", "the_fox", "fox", "fox_of", "of"}, new int[] {1, 0, 1, 0, 1});
+
+		assertAnalyzesTo(a, "s s n", new string[] {"s", "s_s", "s", "s_n", "n"}, new int[] {1, 0, 1, 0, 1});
+		assertAnalyzesTo(a, "of the fox", new string[] {"of", "of_the", "the", "the_fox", "fox"}, new int[] {1, 0, 1, 0, 1});
+
+		assertAnalyzesTo(a, "s s s", new string[] {"s", "s_s", "s", "s_s", "s"}, new int[] {1, 0, 1, 0, 1});
+		assertAnalyzesTo(a, "of the of", new string[] {"of", "of_the", "the", "the_of", "of"}, new int[] {1, 0, 1, 0, 1});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly CommonGramsFilterTest outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(CommonGramsFilterTest outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override TokenStreamComponents createComponents(string field, Reader @in)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(@in, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords));
+		  }
+	  }
+
+	  /// <summary>
+	  /// Test that CommonGramsFilter works correctly in case-insensitive mode
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCaseSensitive() throws Exception
+	  public virtual void testCaseSensitive()
+	  {
+		const string input = "How The s a brown s cow d like A B thing?";
+		MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		assertTokenStreamContents(cgf, new string[] {"How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?"});
+	  }
+
+	  /// <summary>
+	  /// Test CommonGramsQueryFilter in the case that the last word is a stopword
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLastWordisStopWord() throws Exception
+	  public virtual void testLastWordisStopWord()
+	  {
+		const string input = "dog the";
+		MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		TokenFilter nsf = new CommonGramsQueryFilter(cgf);
+		assertTokenStreamContents(nsf, new string[] {"dog_the"});
+	  }
+
+	  /// <summary>
+	  /// Test CommonGramsQueryFilter in the case that the first word is a stopword
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFirstWordisStopWord() throws Exception
+	  public virtual void testFirstWordisStopWord()
+	  {
+		const string input = "the dog";
+		MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		TokenFilter nsf = new CommonGramsQueryFilter(cgf);
+		assertTokenStreamContents(nsf, new string[] {"the_dog"});
+	  }
+
+	  /// <summary>
+	  /// Test CommonGramsQueryFilter in the case of a single (stop)word query
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOneWordQueryStopWord() throws Exception
+	  public virtual void testOneWordQueryStopWord()
+	  {
+		const string input = "the";
+		MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		TokenFilter nsf = new CommonGramsQueryFilter(cgf);
+		assertTokenStreamContents(nsf, new string[] {"the"});
+	  }
+
+	  /// <summary>
+	  /// Test CommonGramsQueryFilter in the case of a single word query
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOneWordQuery() throws Exception
+	  public virtual void testOneWordQuery()
+	  {
+		const string input = "monster";
+		MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		TokenFilter nsf = new CommonGramsQueryFilter(cgf);
+		assertTokenStreamContents(nsf, new string[] {"monster"});
+	  }
+
+	  /// <summary>
+	  /// Test CommonGramsQueryFilter when first and last words are stopwords.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void TestFirstAndLastStopWord() throws Exception
+	  public virtual void TestFirstAndLastStopWord()
+	  {
+		const string input = "the of";
+		MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
+		TokenFilter nsf = new CommonGramsQueryFilter(cgf);
+		assertTokenStreamContents(nsf, new string[] {"the_of"});
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+
+		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+
+		Analyzer b = new AnalyzerAnonymousInnerClassHelper4(this);
+
+		checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly CommonGramsFilterTest outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper3(CommonGramsFilterTest outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
+			return new TokenStreamComponents(t, cgf);
+		  }
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+	  {
+		  private readonly CommonGramsFilterTest outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper4(CommonGramsFilterTest outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
+			return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf));
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
new file mode 100644
index 0000000..858f851
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
@@ -0,0 +1,101 @@
+namespace org.apache.lucene.analysis.commongrams
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TestStopFilter = org.apache.lucene.analysis.core.TestStopFilter;
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ClasspathResourceLoader = org.apache.lucene.analysis.util.ClasspathResourceLoader;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+
+	/// <summary>
+	/// Tests pretty much copied from StopFilterFactoryTest We use the test files
+	/// used by the StopFilterFactoryTest TODO: consider creating separate test files
+	/// so this won't break if stop filter test files change
+	/// 
+	/// </summary>
+	public class TestCommonGramsFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInform() throws Exception
+	  public virtual void testInform()
+	  {
+		ResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter));
+		assertTrue("loader is null and it shouldn't be", loader != null);
+		CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true");
+		CharArraySet words = factory.CommonWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
+		assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);
+
+		factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
+		words = factory.CommonWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
+		assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);
+
+		factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
+		words = factory.CommonWords;
+		assertEquals(8, words.size());
+		assertTrue(words.contains("he"));
+		assertTrue(words.contains("him"));
+		assertTrue(words.contains("his"));
+		assertTrue(words.contains("himself"));
+		assertTrue(words.contains("she"));
+		assertTrue(words.contains("her"));
+		assertTrue(words.contains("hers"));
+		assertTrue(words.contains("herself"));
+	  }
+
+	  /// <summary>
+	  /// If no words are provided, then a set of english default stopwords is used.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefaults() throws Exception
+	  public virtual void testDefaults()
+	  {
+		CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams");
+		CharArraySet words = factory.CommonWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue(words.contains("the"));
+		Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
+		TokenStream stream = factory.create(tokenizer);
+		assertTokenStreamContents(stream, new string[] {"testing", "testing_the", "the", "the_factory", "factory"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("CommonGrams", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsQueryFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsQueryFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsQueryFilterFactory.cs
new file mode 100644
index 0000000..64b4f93
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsQueryFilterFactory.cs
@@ -0,0 +1,101 @@
+namespace org.apache.lucene.analysis.commongrams
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TestStopFilter = org.apache.lucene.analysis.core.TestStopFilter;
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ClasspathResourceLoader = org.apache.lucene.analysis.util.ClasspathResourceLoader;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+
+	/// <summary>
+	/// Tests pretty much copied from StopFilterFactoryTest We use the test files
+	/// used by the StopFilterFactoryTest TODO: consider creating separate test files
+	/// so this won't break if stop filter test files change
+	/// 
+	/// </summary>
+	public class TestCommonGramsQueryFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInform() throws Exception
+	  public virtual void testInform()
+	  {
+		ResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter));
+		assertTrue("loader is null and it shouldn't be", loader != null);
+		CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true");
+		CharArraySet words = factory.CommonWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
+		assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);
+
+		factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
+		words = factory.CommonWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
+		assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);
+
+		factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
+		words = factory.CommonWords;
+		assertEquals(8, words.size());
+		assertTrue(words.contains("he"));
+		assertTrue(words.contains("him"));
+		assertTrue(words.contains("his"));
+		assertTrue(words.contains("himself"));
+		assertTrue(words.contains("she"));
+		assertTrue(words.contains("her"));
+		assertTrue(words.contains("hers"));
+		assertTrue(words.contains("herself"));
+	  }
+
+	  /// <summary>
+	  /// If no words are provided, then a set of english default stopwords is used.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefaults() throws Exception
+	  public virtual void testDefaults()
+	  {
+		CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery");
+		CharArraySet words = factory.CommonWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue(words.contains("the"));
+		Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
+		TokenStream stream = factory.create(tokenizer);
+		assertTokenStreamContents(stream, new string[] {"testing_the", "the_factory"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("CommonGramsQuery", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
new file mode 100644
index 0000000..7aa8a77
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
@@ -0,0 +1,412 @@
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
+	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using WhitespaceTokenizer = org.apache.lucene.analysis.core.WhitespaceTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Attribute = org.apache.lucene.util.Attribute;
+	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
+	using InputSource = org.xml.sax.InputSource;
+
+	public class TestCompoundWordTokenFilter : BaseTokenStreamTestCase
+	{
+
+	  private static CharArraySet makeDictionary(params string[] dictionary)
+	  {
+		return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception
+	  public virtual void testHyphenationCompoundWordsDA()
+	  {
+		CharArraySet dict = makeDictionary("læse", "hest");
+
+		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+
+		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+		assertTokenStreamContents(tf, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception
+	  public virtual void testHyphenationCompoundWordsDELongestMatch()
+	  {
+		CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
+
+		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+
+		// the word basket will not be added due to the longest match option
+		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
+		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basketball", "ball", "kurv"}, new int[] {1, 0, 0, 0});
+
+	  }
+
+	  /// <summary>
+	  /// With hyphenation-only, you can get a lot of nonsense tokens.
+	  /// This can be controlled with the min/max subword size.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
+	  public virtual void testHyphenationOnly()
+	  {
+		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+
+		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);
+
+		// min=2, max=4
+		assertTokenStreamContents(tf, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
+
+		tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);
+
+		// min=4, max=6
+		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "sket", "ball", "lkurv", "kurv"});
+
+		tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);
+
+		// min=4, max=10
+		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv"});
+
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDumbCompoundWordsSE() throws Exception
+	  public virtual void testDumbCompoundWordsSE()
+	  {
+		CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad");
+
+		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), MockTokenizer.WHITESPACE, false), dict);
+
+		assertTokenStreamContents(tf, new string[] {"Bildörr", "Bil", "dörr", "Bilmotor", "Bil", "motor", "Biltak", "Bil", "tak", "Slagborr", "Slag", "borr", "Hammarborr", "Hammar", "borr", "Pelarborr", "Pelar", "borr", "Glasögonfodral", "Glas", "ögon", "fodral", "Basfiolsfodral", "Bas", "fiol", "fodral", "Basfiolsfodralmakaregesäll", "Bas", "fiol", "fodral", "makare", "gesäll", "Skomakare", "Sko", "makare", "Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad", "Vind", "rute", "blad", "abba"}, new int[] {0, 0, 0, 8, 8, 8, 17, 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69, 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137, 137, 137, 137, 156}, new int[] {7, 7, 7, 16, 16, 16, 23, 23, 23, 32, 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110, 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155, 155, 160}, new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0
 , 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDumbCompoundWordsSELongestMatch() throws Exception
+	  public virtual void testDumbCompoundWordsSELongestMatch()
+	  {
+		CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
+
+		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
+
+		assertTokenStreamContents(tf, new string[] {"Basfiolsfodralmakaregesäll", "Bas", "fiolsfodral", "fodral", "makare", "gesäll"}, new int[] {0, 0, 0, 0, 0, 0}, new int[] {26, 26, 26, 26, 26, 26}, new int[] {1, 0, 0, 0, 0, 0});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception
+	  public virtual void testTokenEndingWithWordComponentOfMinimumLength()
+	  {
+		CharArraySet dict = makeDictionary("ab", "cd", "ef");
+
+		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdef")
+		   ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+		assertTokenStreamContents(tf, new string[] {"abcdef", "ab", "cd", "ef"}, new int[] {0, 0, 0, 0}, new int[] {6, 6, 6, 6}, new int[] {1, 0, 0, 0});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWordComponentWithLessThanMinimumLength() throws Exception
+	  public virtual void testWordComponentWithLessThanMinimumLength()
+	  {
+		CharArraySet dict = makeDictionary("abc", "d", "efg");
+
+		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")
+		   ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+	  // since "d" is shorter than the minimum subword size, it should not be added to the token stream
+		assertTokenStreamContents(tf, new string[] {"abcdefg", "abc", "efg"}, new int[] {0, 0, 0}, new int[] {7, 7, 7}, new int[] {1, 0, 0});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReset() throws Exception
+	  public virtual void testReset()
+	  {
+		CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");
+
+		Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
+		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+		CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute));
+		tf.reset();
+		assertTrue(tf.incrementToken());
+		assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
+		assertTrue(tf.incrementToken());
+		assertEquals("Rind", termAtt.ToString());
+		tf.end();
+		tf.close();
+		wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz");
+		tf.reset();
+		assertTrue(tf.incrementToken());
+		assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRetainMockAttribute() throws Exception
+	  public virtual void testRetainMockAttribute()
+	  {
+		CharArraySet dict = makeDictionary("abc", "d", "efg");
+		Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
+		TokenStream stream = new MockRetainAttributeFilter(tokenizer);
+		stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+		MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute));
+		stream.reset();
+		while (stream.incrementToken())
+		{
+		  assertTrue("Custom attribute value was lost", retAtt.Retain);
+		}
+
+	  }
+
+	  public interface MockRetainAttribute : Attribute
+	  {
+		bool Retain {set;get;}
+	  }
+
+	  public sealed class MockRetainAttributeImpl : AttributeImpl, MockRetainAttribute
+	  {
+		internal bool retain = false;
+		public override void clear()
+		{
+		  retain = false;
+		}
+		public bool Retain
+		{
+			get
+			{
+			  return retain;
+			}
+			set
+			{
+			  this.retain = value;
+			}
+		}
+		public override void copyTo(AttributeImpl target)
+		{
+		  MockRetainAttribute t = (MockRetainAttribute) target;
+		  t.Retain = retain;
+		}
+	  }
+
+	  private class MockRetainAttributeFilter : TokenFilter
+	  {
+
+		internal MockRetainAttribute retainAtt = addAttribute(typeof(MockRetainAttribute));
+
+		internal MockRetainAttributeFilter(TokenStream input) : base(input)
+		{
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (input.incrementToken())
+		  {
+			retainAtt.Retain = true;
+			return true;
+		  }
+		  else
+		  {
+		  return false;
+		  }
+		}
+	  }
+
+	  // SOLR-2891
+	  // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
+	  // wrt original text if a previous filter increases the length of the word (in this case ü -> ue)
+	  // so in this case we behave like WDF, and preserve any modified offsets
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInvalidOffsets() throws Exception
+	  public virtual void testInvalidOffsets()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("fall");
+		CharArraySet dict = makeDictionary("fall");
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
+		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		builder.add("ü", "ue");
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap normMap = builder.build();
+		NormalizeCharMap normMap = builder.build();
+
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);
+
+		assertAnalyzesTo(analyzer, "banküberfall", new string[] {"bankueberfall", "fall"}, new int[] {0, 0}, new int[] {12, 12});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestCompoundWordTokenFilter outerInstance;
+
+		  private CharArraySet dict;
+		  private NormalizeCharMap normMap;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestCompoundWordTokenFilter outerInstance, CharArraySet dict, NormalizeCharMap normMap)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.dict = dict;
+			  this.normMap = normMap;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
+			return new TokenStreamComponents(tokenizer, filter);
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			return new MappingCharFilter(normMap, reader);
+		  }
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+		CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
+		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+
+		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+		Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
+		checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestCompoundWordTokenFilter outerInstance;
+
+		  private CharArraySet dict;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.dict = dict;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+		  }
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestCompoundWordTokenFilter outerInstance;
+
+		  private HyphenationTree hyphenator;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.hyphenator = hyphenator;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+			return new TokenStreamComponents(tokenizer, filter);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws Exception
+	  public virtual void testEmptyTerm()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+		CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, dict);
+		checkOneTerm(a, "", "");
+
+		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+		Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);
+		checkOneTerm(b, "", "");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+	  {
+		  private readonly TestCompoundWordTokenFilter outerInstance;
+
+		  private CharArraySet dict;
+
+		  public AnalyzerAnonymousInnerClassHelper4(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.dict = dict;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+		  }
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+	  {
+		  private readonly TestCompoundWordTokenFilter outerInstance;
+
+		  private HyphenationTree hyphenator;
+
+		  public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.hyphenator = hyphenator;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+			return new TokenStreamComponents(tokenizer, filter);
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
new file mode 100644
index 0000000..dd219c0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Dictionary compound filter factory is working.
+	/// </summary>
+	public class TestDictionaryCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Ensure the filter actually decompounds text.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDecompounding() throws Exception
+	  public virtual void testDecompounding()
+	  {
+		Reader reader = new StringReader("I like to play softball");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt").create(stream);
+		assertTokenStreamContents(stream, new string[] {"I", "like", "to", "play", "softball", "soft", "ball"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
new file mode 100644
index 0000000..f195618
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
@@ -0,0 +1,77 @@
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Hyphenation compound filter factory is working.
+	/// </summary>
+	public class TestHyphenationCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Ensure the factory works with hyphenation grammar+dictionary: using default options.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHyphenationWithDictionary() throws Exception
+	  public virtual void testHyphenationWithDictionary()
+	  {
+		Reader reader = new StringReader("min veninde som er lidt af en læsehest");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "dictionary", "da_compoundDictionary.txt").create(stream);
+
+		assertTokenStreamContents(stream, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
+	  }
+
+	  /// <summary>
+	  /// Ensure the factory works with no dictionary: using hyphenation grammar only.
+	  /// Also change the min/max subword sizes from the default. When using no dictionary,
+	  /// its generally necessary to tweak these, or you get lots of expansions.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
+	  public virtual void testHyphenationOnly()
+	  {
+		Reader reader = new StringReader("basketballkurv");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "minSubwordSize", "2", "maxSubwordSize", "4").create(stream);
+
+		assertTokenStreamContents(stream, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
new file mode 100644
index 0000000..0c6a4ca
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
@@ -0,0 +1,193 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using PatternKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using FrenchStemFilter = org.apache.lucene.analysis.fr.FrenchStemFilter;
+	using IndicTokenizer = org.apache.lucene.analysis.@in.IndicTokenizer;
+	using DutchStemFilter = org.apache.lucene.analysis.nl.DutchStemFilter;
+	using ReversePathHierarchyTokenizer = org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
+	using TeeSinkTokenFilter = org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+
+	/// <summary>
+	/// Tests that any newly added Tokenizers/TokenFilters/CharFilters have a
+	/// corresponding factory (and that the SPI configuration is correct)
+	/// </summary>
+	public class TestAllAnalyzersHaveFactories : LuceneTestCase
+	{
+
+	  // these are test-only components (e.g. test-framework)
+	  private static readonly ISet<Type> testComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+	  static TestAllAnalyzersHaveFactories()
+	  {
+		Collections.addAll<Type>(testComponents, typeof(MockTokenizer), typeof(MockCharFilter), typeof(MockFixedLengthPayloadFilter), typeof(MockGraphTokenFilter), typeof(MockHoleInjectingTokenFilter), typeof(MockRandomLookaheadTokenFilter), typeof(MockTokenFilter), typeof(MockVariableLengthPayloadFilter), typeof(ValidatingTokenFilter), typeof(CrankyTokenFilter));
+		Collections.addAll<Type>(crazyComponents, typeof(CachingTokenFilter), typeof(TeeSinkTokenFilter));
+		Collections.addAll<Type>(deprecatedDuplicatedComponents, typeof(DutchStemFilter), typeof(FrenchStemFilter), typeof(IndicTokenizer));
+		Collections.addAll<Type>(oddlyNamedComponents, typeof(ReversePathHierarchyTokenizer), typeof(SnowballFilter), typeof(PatternKeywordMarkerFilter), typeof(SetKeywordMarkerFilter)); // this is called SnowballPorterFilterFactory -  this is supported via an option to PathHierarchyTokenizer's factory
+	  }
+
+	  // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for these?
+	  private static readonly ISet<Type> crazyComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+
+	  // these are deprecated components that are just exact dups of other functionality: they dont need factories
+	  // (they never had them)
+	  private static readonly ISet<Type> deprecatedDuplicatedComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+
+	  // these are oddly-named (either the actual analyzer, or its factory)
+	  // they do actually have factories.
+	  // TODO: clean this up!
+	  private static readonly ISet<Type> oddlyNamedComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+
+	  private static readonly ResourceLoader loader = new StringMockResourceLoader("");
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+	  public virtual void test()
+	  {
+		IList<Type> analysisClasses = new List<Type>();
+		((List<Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.analysis"));
+		((List<Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.collation"));
+
+		foreach (Class c in analysisClasses)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int modifiers = c.getModifiers();
+		  int modifiers = c.Modifiers;
+		  if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || testComponents.Contains(c) || crazyComponents.Contains(c) || oddlyNamedComponents.Contains(c) || deprecatedDuplicatedComponents.Contains(c) || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter))))
+		  { // deprecated ones are typically back compat hacks
+			// don't waste time with abstract classes
+			continue;
+		  }
+
+		  IDictionary<string, string> args = new Dictionary<string, string>();
+		  args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();
+
+		  if (c.IsSubclassOf(typeof(Tokenizer)))
+		  {
+			string clazzName = c.SimpleName;
+			assertTrue(clazzName.EndsWith("Tokenizer", StringComparison.Ordinal));
+			string simpleName = clazzName.Substring(0, clazzName.Length - 9);
+			assertNotNull(TokenizerFactory.lookupClass(simpleName));
+			TokenizerFactory instance = null;
+			try
+			{
+			  instance = TokenizerFactory.forName(simpleName, args);
+			  assertNotNull(instance);
+			  if (instance is ResourceLoaderAware)
+			  {
+				((ResourceLoaderAware) instance).inform(loader);
+			  }
+			  assertSame(c, instance.create(new StringReader("")).GetType());
+			}
+			catch (System.ArgumentException e)
+			{
+			  if (e.InnerException is NoSuchMethodException)
+			  {
+				// there is no corresponding ctor available
+				throw e;
+			  }
+			  // TODO: For now pass because some factories have not yet a default config that always works
+			}
+		  }
+		  else if (c.IsSubclassOf(typeof(TokenFilter)))
+		  {
+			string clazzName = c.SimpleName;
+			assertTrue(clazzName.EndsWith("Filter", StringComparison.Ordinal));
+			string simpleName = clazzName.Substring(0, clazzName.Length - (clazzName.EndsWith("TokenFilter", StringComparison.Ordinal) ? 11 : 6));
+			assertNotNull(TokenFilterFactory.lookupClass(simpleName));
+			TokenFilterFactory instance = null;
+			try
+			{
+			  instance = TokenFilterFactory.forName(simpleName, args);
+			  assertNotNull(instance);
+			  if (instance is ResourceLoaderAware)
+			  {
+				((ResourceLoaderAware) instance).inform(loader);
+			  }
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.TokenStream> createdClazz = instance.create(new KeywordTokenizer(new java.io.StringReader(""))).getClass();
+			  Type<?> createdClazz = instance.create(new KeywordTokenizer(new StringReader(""))).GetType();
+			  // only check instance if factory have wrapped at all!
+			  if (typeof(KeywordTokenizer) != createdClazz)
+			  {
+				assertSame(c, createdClazz);
+			  }
+			}
+			catch (System.ArgumentException e)
+			{
+			  if (e.InnerException is NoSuchMethodException)
+			  {
+				// there is no corresponding ctor available
+				throw e;
+			  }
+			  // TODO: For now pass because some factories have not yet a default config that always works
+			}
+		  }
+		  else if (c.IsSubclassOf(typeof(CharFilter)))
+		  {
+			string clazzName = c.SimpleName;
+			assertTrue(clazzName.EndsWith("CharFilter", StringComparison.Ordinal));
+			string simpleName = clazzName.Substring(0, clazzName.Length - 10);
+			assertNotNull(CharFilterFactory.lookupClass(simpleName));
+			CharFilterFactory instance = null;
+			try
+			{
+			  instance = CharFilterFactory.forName(simpleName, args);
+			  assertNotNull(instance);
+			  if (instance is ResourceLoaderAware)
+			  {
+				((ResourceLoaderAware) instance).inform(loader);
+			  }
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class<? extends java.io.Reader> createdClazz = instance.create(new java.io.StringReader("")).getClass();
+			  Type<?> createdClazz = instance.create(new StringReader("")).GetType();
+			  // only check instance if factory have wrapped at all!
+			  if (typeof(StringReader) != createdClazz)
+			  {
+				assertSame(c, createdClazz);
+			  }
+			}
+			catch (System.ArgumentException e)
+			{
+			  if (e.InnerException is NoSuchMethodException)
+			  {
+				// there is no corresponding ctor available
+				throw e;
+			  }
+			  // TODO: For now pass because some factories have not yet a default config that always works
+			}
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAnalyzers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAnalyzers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAnalyzers.cs
new file mode 100644
index 0000000..46680f4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAnalyzers.cs
@@ -0,0 +1,314 @@
+using System;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using org.apache.lucene.analysis;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using Version = org.apache.lucene.util.Version;
+
+	public class TestAnalyzers : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSimple() throws Exception
+	  public virtual void testSimple()
+	  {
+		Analyzer a = new SimpleAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(a, "foo bar FOO BAR", new string[] {"foo", "bar", "foo", "bar"});
+		assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[] {"foo", "bar", "foo", "bar"});
+		assertAnalyzesTo(a, "foo.bar.FOO.BAR", new string[] {"foo", "bar", "foo", "bar"});
+		assertAnalyzesTo(a, "U.S.A.", new string[] {"u", "s", "a"});
+		assertAnalyzesTo(a, "C++", new string[] {"c"});
+		assertAnalyzesTo(a, "B2B", new string[] {"b", "b"});
+		assertAnalyzesTo(a, "2B", new string[] {"b"});
+		assertAnalyzesTo(a, "\"QUOTED\" word", new string[] {"quoted", "word"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNull() throws Exception
+	  public virtual void testNull()
+	  {
+		Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(a, "foo bar FOO BAR", new string[] {"foo", "bar", "FOO", "BAR"});
+		assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[] {"foo", "bar", ".", "FOO", "<>", "BAR"});
+		assertAnalyzesTo(a, "foo.bar.FOO.BAR", new string[] {"foo.bar.FOO.BAR"});
+		assertAnalyzesTo(a, "U.S.A.", new string[] {"U.S.A."});
+		assertAnalyzesTo(a, "C++", new string[] {"C++"});
+		assertAnalyzesTo(a, "B2B", new string[] {"B2B"});
+		assertAnalyzesTo(a, "2B", new string[] {"2B"});
+		assertAnalyzesTo(a, "\"QUOTED\" word", new string[] {"\"QUOTED\"", "word"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStop() throws Exception
+	  public virtual void testStop()
+	  {
+		Analyzer a = new StopAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(a, "foo bar FOO BAR", new string[] {"foo", "bar", "foo", "bar"});
+		assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new string[] {"foo", "bar", "foo", "bar"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: void verifyPayload(TokenStream ts) throws java.io.IOException
+	  internal virtual void verifyPayload(TokenStream ts)
+	  {
+		PayloadAttribute payloadAtt = ts.getAttribute(typeof(PayloadAttribute));
+		ts.reset();
+		for (sbyte b = 1;;b++)
+		{
+		  bool hasNext = ts.incrementToken();
+		  if (!hasNext)
+		  {
+			  break;
+		  }
+		  // System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
+		  // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
+		  assertEquals(b, payloadAtt.Payload.bytes[0]);
+		}
+	  }
+
+	  // Make sure old style next() calls result in a new copy of payloads
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPayloadCopy() throws java.io.IOException
+	  public virtual void testPayloadCopy()
+	  {
+		string s = "how now brown cow";
+		TokenStream ts;
+		ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(s));
+		ts = new PayloadSetter(ts);
+		verifyPayload(ts);
+
+		ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(s));
+		ts = new PayloadSetter(ts);
+		verifyPayload(ts);
+	  }
+
+	  // LUCENE-1150: Just a compile time test, to ensure the
+	  // StandardAnalyzer constants remain publicly accessible
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unused") public void _testStandardConstants()
+	  public virtual void _testStandardConstants()
+	  {
+		int x = StandardTokenizer.ALPHANUM;
+		x = StandardTokenizer.APOSTROPHE;
+		x = StandardTokenizer.ACRONYM;
+		x = StandardTokenizer.COMPANY;
+		x = StandardTokenizer.EMAIL;
+		x = StandardTokenizer.HOST;
+		x = StandardTokenizer.NUM;
+		x = StandardTokenizer.CJ;
+		string[] y = StandardTokenizer.TOKEN_TYPES;
+	  }
+
+	  private class LowerCaseWhitespaceAnalyzer : Analyzer
+	  {
+
+		public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+		  Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+		  return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
+		}
+
+	  }
+
+	  private class UpperCaseWhitespaceAnalyzer : Analyzer
+	  {
+
+		public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+		  Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+		  return new TokenStreamComponents(tokenizer, new UpperCaseFilter(TEST_VERSION_CURRENT, tokenizer));
+		}
+
+	  }
+
+
+	  /// <summary>
+	  /// Test that LowercaseFilter handles entire unicode range correctly
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLowerCaseFilter() throws java.io.IOException
+	  public virtual void testLowerCaseFilter()
+	  {
+		Analyzer a = new LowerCaseWhitespaceAnalyzer();
+		// BMP
+		assertAnalyzesTo(a, "AbaCaDabA", new string[] {"abacadaba"});
+		// supplementary
+		assertAnalyzesTo(a, "\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16", new string[] {"\ud801\udc3e\ud801\udc3e\ud801\udc3e\ud801\udc3e"});
+		assertAnalyzesTo(a, "AbaCa\ud801\udc16DabA", new string[] {"abaca\ud801\udc3edaba"});
+		// unpaired lead surrogate
+		assertAnalyzesTo(a, "AbaC\uD801AdaBa", new string [] {"abac\uD801adaba"});
+		// unpaired trail surrogate
+		assertAnalyzesTo(a, "AbaC\uDC16AdaBa", new string [] {"abac\uDC16adaba"});
+	  }
+
+	  /// <summary>
+	  /// Test that LowercaseFilter handles entire unicode range correctly
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUpperCaseFilter() throws java.io.IOException
+	  public virtual void testUpperCaseFilter()
+	  {
+		Analyzer a = new UpperCaseWhitespaceAnalyzer();
+		// BMP
+		assertAnalyzesTo(a, "AbaCaDabA", new string[] {"ABACADABA"});
+		// supplementary
+		assertAnalyzesTo(a, "\ud801\udc3e\ud801\udc3e\ud801\udc3e\ud801\udc3e", new string[] {"\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16"});
+		assertAnalyzesTo(a, "AbaCa\ud801\udc3eDabA", new string[] {"ABACA\ud801\udc16DABA"});
+		// unpaired lead surrogate
+		assertAnalyzesTo(a, "AbaC\uD801AdaBa", new string [] {"ABAC\uD801ADABA"});
+		// unpaired trail surrogate
+		assertAnalyzesTo(a, "AbaC\uDC16AdaBa", new string [] {"ABAC\uDC16ADABA"});
+	  }
+
+
+	  /// <summary>
+	  /// Test that LowercaseFilter handles the lowercasing correctly if the term
+	  /// buffer has a trailing surrogate character leftover and the current term in
+	  /// the buffer ends with a corresponding leading surrogate.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLowerCaseFilterLowSurrogateLeftover() throws java.io.IOException
+	  public virtual void testLowerCaseFilterLowSurrogateLeftover()
+	  {
+		// test if the limit of the termbuffer is correctly used with supplementary
+		// chars
+		WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("BogustermBogusterm\udc16"));
+		LowerCaseFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer);
+		assertTokenStreamContents(filter, new string[] {"bogustermbogusterm\udc16"});
+		filter.reset();
+		string highSurEndingUpper = "BogustermBoguster\ud801";
+		string highSurEndingLower = "bogustermboguster\ud801";
+		tokenizer.Reader = new StringReader(highSurEndingUpper);
+		assertTokenStreamContents(filter, new string[] {highSurEndingLower});
+		assertTrue(filter.hasAttribute(typeof(CharTermAttribute)));
+		char[] termBuffer = filter.getAttribute(typeof(CharTermAttribute)).buffer();
+		int length = highSurEndingLower.Length;
+		assertEquals('\ud801', termBuffer[length - 1]);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLowerCaseTokenizer() throws java.io.IOException
+	  public virtual void testLowerCaseTokenizer()
+	  {
+		StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+		LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
+		assertTokenStreamContents(tokenizer, new string[] {"tokenizer", "\ud801\udc44test"});
+	  }
+
+	  /// @deprecated (3.1) 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("(3.1)") public void testLowerCaseTokenizerBWCompat() throws java.io.IOException
+	  [Obsolete("(3.1)")]
+	  public virtual void testLowerCaseTokenizerBWCompat()
+	  {
+		StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+		LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30, reader);
+		assertTokenStreamContents(tokenizer, new string[] {"tokenizer", "test"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWhitespaceTokenizer() throws java.io.IOException
+	  public virtual void testWhitespaceTokenizer()
+	  {
+		StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+		WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+		assertTokenStreamContents(tokenizer, new string[] {"Tokenizer", "\ud801\udc1ctest"});
+	  }
+
+	  /// @deprecated (3.1) 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("(3.1)") public void testWhitespaceTokenizerBWCompat() throws java.io.IOException
+	  [Obsolete("(3.1)")]
+	  public virtual void testWhitespaceTokenizerBWCompat()
+	  {
+		StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+		WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30, reader);
+		assertTokenStreamContents(tokenizer, new string[] {"Tokenizer", "\ud801\udc1ctest"});
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+		checkRandomData(random(), new SimpleAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+		checkRandomData(random(), new StopAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  /// <summary>
+	  /// blast some random large strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		Random random = random();
+		checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+		checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+		checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+	}
+
+	internal sealed class PayloadSetter : TokenFilter
+	{
+		private bool InstanceFieldsInitialized = false;
+
+		private void InitializeInstanceFields()
+		{
+			p = new BytesRef(data,0,1);
+		}
+
+	  internal PayloadAttribute payloadAtt;
+	  public PayloadSetter(TokenStream input) : base(input)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+		payloadAtt = addAttribute(typeof(PayloadAttribute));
+	  }
+
+	  internal sbyte[] data = new sbyte[1];
+	  internal BytesRef p;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		bool hasNext = input.incrementToken();
+		if (!hasNext)
+		{
+			return false;
+		}
+		payloadAtt.Payload = p; // reuse the payload / byte[]
+		data[0]++;
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message