lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [17/52] [abbrv] lucenenet git commit: Ported Analysis.Compound namespace + tests
Date Thu, 01 Sep 2016 14:39:38 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
index 7aa8a77..1feb390 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
@@ -1,7 +1,16 @@
-namespace org.apache.lucene.analysis.compound
+using Lucene.Net.Analysis.CharFilters;
+using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Compound
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,395 +27,370 @@
 	 * limitations under the License.
 	 */
 
+    public class TestCompoundWordTokenFilter : BaseTokenStreamTestCase
+    {
+
+        private static CharArraySet makeDictionary(params string[] dictionary)
+        {
+            return new CharArraySet(TEST_VERSION_CURRENT, Arrays.AsList(dictionary), true);
+        }
 
-	using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
-	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
-	using WhitespaceTokenizer = org.apache.lucene.analysis.core.WhitespaceTokenizer;
-	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using Attribute = org.apache.lucene.util.Attribute;
-	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
-	using InputSource = org.xml.sax.InputSource;
-
-	public class TestCompoundWordTokenFilter : BaseTokenStreamTestCase
-	{
-
-	  private static CharArraySet makeDictionary(params string[] dictionary)
-	  {
-		return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception
-	  public virtual void testHyphenationCompoundWordsDA()
-	  {
-		CharArraySet dict = makeDictionary("læse", "hest");
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-
-		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-		assertTokenStreamContents(tf, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
-	  }
+        [Test]
+        public virtual void TestHyphenationCompoundWordsDA()
+        {
+            CharArraySet dict = makeDictionary("læse", "hest");
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception
-	  public virtual void testHyphenationCompoundWordsDELongestMatch()
-	  {
-		CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-
-		// the word basket will not be added due to the longest match option
-		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basketball", "ball", "kurv"}, new int[] {1, 0, 0, 0});
-
-	  }
-
-	  /// <summary>
-	  /// With hyphenation-only, you can get a lot of nonsense tokens.
-	  /// This can be controlled with the min/max subword size.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
-	  public virtual void testHyphenationOnly()
-	  {
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-
-		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);
-
-		// min=2, max=4
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
-
-		tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);
-
-		// min=4, max=6
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "sket", "ball", "lkurv", "kurv"});
-
-		tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);
-
-		// min=4, max=10
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv"});
-
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testDumbCompoundWordsSE() throws Exception
-	  public virtual void testDumbCompoundWordsSE()
-	  {
-		CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), MockTokenizer.WHITESPACE, false), dict);
-
-		assertTokenStreamContents(tf, new string[] {"Bildörr", "Bil", "dörr", "Bilmotor", "Bil", "motor", "Biltak", "Bil", "tak", "Slagborr", "Slag", "borr", "Hammarborr", "Hammar", "borr", "Pelarborr", "Pelar", "borr", "Glasögonfodral", "Glas", "ögon", "fodral", "Basfiolsfodral", "Bas", "fiol", "fodral", "Basfiolsfodralmakaregesäll", "Bas", "fiol", "fodral", "makare", "gesäll", "Skomakare", "Sko", "makare", "Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad", "Vind", "rute", "blad", "abba"}, new int[] {0, 0, 0, 8, 8, 8, 17, 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69, 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137, 137, 137, 137, 156}, new int[] {7, 7, 7, 16, 16, 16, 23, 23, 23, 32, 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110, 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155, 155, 160}, new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0
 , 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testDumbCompoundWordsSELongestMatch() throws Exception
-	  public virtual void testDumbCompoundWordsSELongestMatch()
-	  {
-		CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
-
-		assertTokenStreamContents(tf, new string[] {"Basfiolsfodralmakaregesäll", "Bas", "fiolsfodral", "fodral", "makare", "gesäll"}, new int[] {0, 0, 0, 0, 0, 0}, new int[] {26, 26, 26, 26, 26, 26}, new int[] {1, 0, 0, 0, 0, 0});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception
-	  public virtual void testTokenEndingWithWordComponentOfMinimumLength()
-	  {
-		CharArraySet dict = makeDictionary("ab", "cd", "ef");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdef")
-		   ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-
-		assertTokenStreamContents(tf, new string[] {"abcdef", "ab", "cd", "ef"}, new int[] {0, 0, 0, 0}, new int[] {6, 6, 6, 6}, new int[] {1, 0, 0, 0});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testWordComponentWithLessThanMinimumLength() throws Exception
-	  public virtual void testWordComponentWithLessThanMinimumLength()
-	  {
-		CharArraySet dict = makeDictionary("abc", "d", "efg");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")
-		   ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-
-	  // since "d" is shorter than the minimum subword size, it should not be added to the token stream
-		assertTokenStreamContents(tf, new string[] {"abcdefg", "abc", "efg"}, new int[] {0, 0, 0}, new int[] {7, 7, 7}, new int[] {1, 0, 0});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testReset() throws Exception
-	  public virtual void testReset()
-	  {
-		CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");
-
-		Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-
-		CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute));
-		tf.reset();
-		assertTrue(tf.incrementToken());
-		assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
-		assertTrue(tf.incrementToken());
-		assertEquals("Rind", termAtt.ToString());
-		tf.end();
-		tf.close();
-		wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz");
-		tf.reset();
-		assertTrue(tf.incrementToken());
-		assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testRetainMockAttribute() throws Exception
-	  public virtual void testRetainMockAttribute()
-	  {
-		CharArraySet dict = makeDictionary("abc", "d", "efg");
-		Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
-		TokenStream stream = new MockRetainAttributeFilter(tokenizer);
-		stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-		MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute));
-		stream.reset();
-		while (stream.incrementToken())
-		{
-		  assertTrue("Custom attribute value was lost", retAtt.Retain);
-		}
-
-	  }
-
-	  public interface MockRetainAttribute : Attribute
-	  {
-		bool Retain {set;get;}
-	  }
-
-	  public sealed class MockRetainAttributeImpl : AttributeImpl, MockRetainAttribute
-	  {
-		internal bool retain = false;
-		public override void clear()
-		{
-		  retain = false;
-		}
-		public bool Retain
-		{
-			get
-			{
-			  return retain;
-			}
-			set
-			{
-			  this.retain = value;
-			}
-		}
-		public override void copyTo(AttributeImpl target)
-		{
-		  MockRetainAttribute t = (MockRetainAttribute) target;
-		  t.Retain = retain;
-		}
-	  }
-
-	  private class MockRetainAttributeFilter : TokenFilter
-	  {
-
-		internal MockRetainAttribute retainAtt = addAttribute(typeof(MockRetainAttribute));
-
-		internal MockRetainAttributeFilter(TokenStream input) : base(input)
-		{
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-		public override bool incrementToken()
-		{
-		  if (input.incrementToken())
-		  {
-			retainAtt.Retain = true;
-			return true;
-		  }
-		  else
-		  {
-		  return false;
-		  }
-		}
-	  }
-
-	  // SOLR-2891
-	  // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
-	  // wrt original text if a previous filter increases the length of the word (in this case ü -> ue)
-	  // so in this case we behave like WDF, and preserve any modified offsets
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testInvalidOffsets() throws Exception
-	  public virtual void testInvalidOffsets()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("fall");
-		CharArraySet dict = makeDictionary("fall");
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
-		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
-		builder.add("ü", "ue");
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap normMap = builder.build();
-		NormalizeCharMap normMap = builder.build();
-
-		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);
-
-		assertAnalyzesTo(analyzer, "banküberfall", new string[] {"bankueberfall", "fall"}, new int[] {0, 0}, new int[] {12, 12});
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private CharArraySet dict;
-		  private NormalizeCharMap normMap;
-
-		  public AnalyzerAnonymousInnerClassHelper(TestCompoundWordTokenFilter outerInstance, CharArraySet dict, NormalizeCharMap normMap)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.dict = dict;
-			  this.normMap = normMap;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
-			return new TokenStreamComponents(tokenizer, filter);
-		  }
-
-		  protected internal override Reader initReader(string fieldName, Reader reader)
-		  {
-			return new MappingCharFilter(normMap, reader);
-		  }
-	  }
-
-	  /// <summary>
-	  /// blast some random strings through the analyzer </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testRandomStrings() throws Exception
-	  public virtual void testRandomStrings()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
-		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-		Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
-		checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private CharArraySet dict;
-
-		  public AnalyzerAnonymousInnerClassHelper2(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.dict = dict;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
-		  }
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private HyphenationTree hyphenator;
-
-		  public AnalyzerAnonymousInnerClassHelper3(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.hyphenator = hyphenator;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
-			return new TokenStreamComponents(tokenizer, filter);
-		  }
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testEmptyTerm() throws Exception
-	  public virtual void testEmptyTerm()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, dict);
-		checkOneTerm(a, "", "");
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-		Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);
-		checkOneTerm(b, "", "");
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private CharArraySet dict;
-
-		  public AnalyzerAnonymousInnerClassHelper4(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.dict = dict;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new KeywordTokenizer(reader);
-			return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
-		  }
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private HyphenationTree hyphenator;
-
-		  public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.hyphenator = hyphenator;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new KeywordTokenizer(reader);
-			TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
-			return new TokenStreamComponents(tokenizer, filter);
-		  }
-	  }
-	}
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
 
+                HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+                AssertTokenStreamContents(tf, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 });
+            }
+        }
+
+        [Test]
+        public virtual void TestHyphenationCompoundWordsDELongestMatch()
+        {
+            CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
+
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+
+                // the word basket will not be added due to the longest match option
+                HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "basketball", "ball", "kurv" }, new int[] { 1, 0, 0, 0 });
+            }
+        }
+
+        /// <summary>
+        /// With hyphenation-only, you can get a lot of nonsense tokens.
+        /// This can be controlled with the min/max subword size.
+        /// </summary>
+        [Test]
+        public virtual void TestHyphenationOnly()
+        {
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+
+                HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);
+
+                // min=2, max=4
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" });
+
+                tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);
+
+                // min=4, max=6
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" });
+
+                tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);
+
+                // min=4, max=10
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" });
+            }
+        }
+
+        [Test]
+        public virtual void TestDumbCompoundWordsSE()
+        {
+            CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), MockTokenizer.WHITESPACE, false), dict);
+
+            AssertTokenStreamContents(tf, new string[] { "Bildörr", "Bil", "dörr", "Bilmotor", "Bil", "motor", "Biltak", "Bil", "tak", "Slagborr", "Slag", "borr", "Hammarborr", "Hammar", "borr", "Pelarborr", "Pelar", "borr", "Glasögonfodral", "Glas", "ögon", "fodral", "Basfiolsfodral", "Bas", "fiol", "fodral", "Basfiolsfodralmakaregesäll", "Bas", "fiol", "fodral", "makare", "gesäll", "Skomakare", "Sko", "makare", "Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad", "Vind", "rute", "blad", "abba" }, new int[] { 0, 0, 0, 8, 8, 8, 17, 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69, 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137, 137, 137, 137, 156 }, new int[] { 7, 7, 7, 16, 16, 16, 23, 23, 23, 32, 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110, 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155, 155, 160 }, new int[] { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
  0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 });
+        }
+
+        [Test]
+        public virtual void TestDumbCompoundWordsSELongestMatch()
+        {
+            CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
+
+            AssertTokenStreamContents(tf, new string[] { "Basfiolsfodralmakaregesäll", "Bas", "fiolsfodral", "fodral", "makare", "gesäll" }, new int[] { 0, 0, 0, 0, 0, 0 }, new int[] { 26, 26, 26, 26, 26, 26 }, new int[] { 1, 0, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestTokenEndingWithWordComponentOfMinimumLength()
+        {
+            CharArraySet dict = makeDictionary("ab", "cd", "ef");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdef")
+               ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+            AssertTokenStreamContents(tf, new string[] { "abcdef", "ab", "cd", "ef" }, new int[] { 0, 0, 0, 0 }, new int[] { 6, 6, 6, 6 }, new int[] { 1, 0, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestWordComponentWithLessThanMinimumLength()
+        {
+            CharArraySet dict = makeDictionary("abc", "d", "efg");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")
+               ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+            // since "d" is shorter than the minimum subword size, it should not be added to the token stream
+            AssertTokenStreamContents(tf, new string[] { "abcdefg", "abc", "efg" }, new int[] { 0, 0, 0 }, new int[] { 7, 7, 7 }, new int[] { 1, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestReset()
+        {
+            CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");
+
+            Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+            ICharTermAttribute termAtt = tf.GetAttribute<ICharTermAttribute>();
+            tf.Reset();
+            assertTrue(tf.IncrementToken());
+            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
+            assertTrue(tf.IncrementToken());
+            assertEquals("Rind", termAtt.ToString());
+            tf.End();
+            tf.Dispose();
+            wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz");
+            tf.Reset();
+            assertTrue(tf.IncrementToken());
+            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
+        }
+
+        [Test]
+        public virtual void TestRetainMockAttribute()
+        {
+            CharArraySet dict = makeDictionary("abc", "d", "efg");
+            Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
+            TokenStream stream = new MockRetainAttributeFilter(tokenizer);
+            stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+            IMockRetainAttribute retAtt = stream.AddAttribute<IMockRetainAttribute>();
+            stream.Reset();
+            while (stream.IncrementToken())
+            {
+                assertTrue("Custom attribute value was lost", retAtt.Retain);
+            }
+
+        }
+
+        public interface IMockRetainAttribute : IAttribute
+        {
+            bool Retain { set; get; }
+        }
+
+        public sealed class MockRetainAttribute : Attribute, IMockRetainAttribute
+        {
+            internal bool retain = false;
+            public override void Clear()
+            {
+                retain = false;
+            }
+            public bool Retain
+            {
+                get
+                {
+                    return retain;
+                }
+                set
+                {
+                    this.retain = value;
+                }
+            }
+            public override void CopyTo(Attribute target)
+            {
+                IMockRetainAttribute t = (IMockRetainAttribute)target;
+                t.Retain = retain;
+            }
+        }
+
+        private class MockRetainAttributeFilter : TokenFilter
+        {
+
+            internal IMockRetainAttribute retainAtt;
+
+            internal MockRetainAttributeFilter(TokenStream input)
+                    : base(input)
+            {
+                retainAtt = AddAttribute<IMockRetainAttribute>();
+            }
+
+            public override sealed bool IncrementToken()
+            {
+                if (input.IncrementToken())
+                {
+                    retainAtt.Retain = true;
+                    return true;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+        }
+
+        // SOLR-2891
+        // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
+        // wrt original text if a previous filter increases the length of the word (in this case ü -> ue)
+        // so in this case we behave like WDF, and preserve any modified offsets
+        [Test]
+        public virtual void TestInvalidOffsets()
+        {
+            CharArraySet dict = makeDictionary("fall");
+            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+            builder.Add("ü", "ue");
+            NormalizeCharMap normMap = builder.Build();
+
+            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);
+
+            AssertAnalyzesTo(analyzer, "banküberfall", new string[] { "bankueberfall", "fall" }, new int[] { 0, 0 }, new int[] { 12, 12 });
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private CharArraySet dict;
+            private NormalizeCharMap normMap;
+
+            public AnalyzerAnonymousInnerClassHelper(TestCompoundWordTokenFilter outerInstance, CharArraySet dict, NormalizeCharMap normMap)
+            {
+                this.outerInstance = outerInstance;
+                this.dict = dict;
+                this.normMap = normMap;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
+                return new TokenStreamComponents(tokenizer, filter);
+            }
+
+            public override TextReader InitReader(string fieldName, TextReader reader)
+            {
+                return new MappingCharFilter(normMap, reader);
+            }
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
+            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+                Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
+                CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private CharArraySet dict;
+
+            public AnalyzerAnonymousInnerClassHelper2(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
+            {
+                this.outerInstance = outerInstance;
+                this.dict = dict;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private HyphenationTree hyphenator;
+
+            public AnalyzerAnonymousInnerClassHelper3(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
+            {
+                this.outerInstance = outerInstance;
+                this.hyphenator = hyphenator;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+                return new TokenStreamComponents(tokenizer, filter);
+            }
+        }
+
+        [Test]
+        public virtual void TestEmptyTerm()
+        {
+            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, dict);
+            CheckOneTerm(a, "", "");
+
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+                Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);
+                CheckOneTerm(b, "", "");
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private CharArraySet dict;
+
+            public AnalyzerAnonymousInnerClassHelper4(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
+            {
+                this.outerInstance = outerInstance;
+                this.dict = dict;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private HyphenationTree hyphenator;
+
+            public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
+            {
+                this.outerInstance = outerInstance;
+                this.hyphenator = hyphenator;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+                return new TokenStreamComponents(tokenizer, filter);
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
index dd219c0..4d469b1 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
@@ -1,7 +1,10 @@
-namespace org.apache.lucene.analysis.compound
-{
+using Lucene.Net.Analysis.Util;
+using System.IO;
+using NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Compound
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,43 +21,37 @@
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// Simple tests to ensure the Dictionary compound filter factory is working.
+    /// </summary>
+    public class TestDictionaryCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
+    {
+        /// <summary>
+        /// Ensure the filter actually decompounds text.
+        /// </summary>
+        [Test]
+        public virtual void TestDecompounding()
+        {
+            TextReader reader = new StringReader("I like to play softball");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "I", "like", "to", "play", "softball", "soft", "ball" });
+        }
 
-	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-
-	/// <summary>
-	/// Simple tests to ensure the Dictionary compound filter factory is working.
-	/// </summary>
-	public class TestDictionaryCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
-	{
-	  /// <summary>
-	  /// Ensure the filter actually decompounds text.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testDecompounding() throws Exception
-	  public virtual void testDecompounding()
-	  {
-		Reader reader = new StringReader("I like to play softball");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt").create(stream);
-		assertTokenStreamContents(stream, new string[] {"I", "like", "to", "play", "softball", "soft", "ball"});
-	  }
-
-	  /// <summary>
-	  /// Test that bogus arguments result in exception </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBogusArguments() throws Exception
-	  public virtual void testBogusArguments()
-	  {
-		try
-		{
-		  tokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt", "bogusArg", "bogusValue");
-		  fail();
-		}
-		catch (System.ArgumentException expected)
-		{
-		  assertTrue(expected.Message.contains("Unknown parameters"));
-		}
-	  }
-	}
-
+        /// <summary>
+        /// Test that bogus arguments result in exception </summary>
+        [Test]
+        public virtual void TestBogusArguments()
+        {
+            try
+            {
+                TokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
index f195618..79bf1a5 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
@@ -1,7 +1,10 @@
-namespace org.apache.lucene.analysis.compound
-{
+using Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System.IO;
 
-	/*
+namespace Lucene.Net.Analysis.Compound
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,60 +21,53 @@
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// Simple tests to ensure the Hyphenation compound filter factory is working.
+    /// </summary>
+    public class TestHyphenationCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
+    {
+        /// <summary>
+        /// Ensure the factory works with hyphenation grammar+dictionary: using default options.
+        /// </summary>
+        [Test]
+        public virtual void TestHyphenationWithDictionary()
+        {
+            TextReader reader = new StringReader("min veninde som er lidt af en læsehest");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "dictionary", "da_compoundDictionary.txt").Create(stream);
 
-	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-
-	/// <summary>
-	/// Simple tests to ensure the Hyphenation compound filter factory is working.
-	/// </summary>
-	public class TestHyphenationCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
-	{
-	  /// <summary>
-	  /// Ensure the factory works with hyphenation grammar+dictionary: using default options.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationWithDictionary() throws Exception
-	  public virtual void testHyphenationWithDictionary()
-	  {
-		Reader reader = new StringReader("min veninde som er lidt af en læsehest");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "dictionary", "da_compoundDictionary.txt").create(stream);
-
-		assertTokenStreamContents(stream, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
-	  }
-
-	  /// <summary>
-	  /// Ensure the factory works with no dictionary: using hyphenation grammar only.
-	  /// Also change the min/max subword sizes from the default. When using no dictionary,
-	  /// its generally necessary to tweak these, or you get lots of expansions.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
-	  public virtual void testHyphenationOnly()
-	  {
-		Reader reader = new StringReader("basketballkurv");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "minSubwordSize", "2", "maxSubwordSize", "4").create(stream);
+            AssertTokenStreamContents(stream, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 });
+        }
 
-		assertTokenStreamContents(stream, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
-	  }
+        /// <summary>
+        /// Ensure the factory works with no dictionary: using hyphenation grammar only.
+        /// Also change the min/max subword sizes from the default. When using no dictionary,
+        /// its generally necessary to tweak these, or you get lots of expansions.
+        /// </summary>
+        [Test]
+        public virtual void TestHyphenationOnly()
+        {
+            TextReader reader = new StringReader("basketballkurv");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "minSubwordSize", "2", "maxSubwordSize", "4").Create(stream);
 
-	  /// <summary>
-	  /// Test that bogus arguments result in exception </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBogusArguments() throws Exception
-	  public virtual void testBogusArguments()
-	  {
-		try
-		{
-		  tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "bogusArg", "bogusValue");
-		  fail();
-		}
-		catch (System.ArgumentException expected)
-		{
-		  assertTrue(expected.Message.contains("Unknown parameters"));
-		}
-	  }
-	}
+            AssertTokenStreamContents(stream, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" });
+        }
 
+        /// <summary>
+        /// Test that bogus arguments result in exception </summary>
+        [Test]
+        public virtual void TestBogusArguments()
+        {
+            try
+            {
+                TokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt
new file mode 100644
index 0000000..f4977b5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the DictionaryCompound factory
+soft
+ball
+team

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml
new file mode 100644
index 0000000..2c8d203
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml
@@ -0,0 +1,1208 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE hyphenation-info SYSTEM "hyphenation.dtd">
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--
+This file contains the hyphenation patterns for danish.
+Adapted from dkhyph.tex, dkcommon.tex and dkspecial.tex
+originally created by Frank Jensen (fj@iesd.auc.dk).
+FOP adaptation by Carlos Villegas (cav@uniscope.co.jp)
+-->
+<hyphenation-info>
+
+<hyphen-char value="-"/>
+<hyphen-min before="2" after="2"/>
+
+<classes>
+aA
+bB
+cC
+dD
+eE
+fF
+gG
+hH
+iI
+jJ
+kK
+lL
+mM
+nN
+oO
+pP
+qQ
+rR
+sS
+tT
+uU
+vV
+wW
+xX
+yY
+zZ
+æÆ
+øØ
+åÅ
+</classes>
+<patterns>
+<!-- dkcommon -->
+.ae3
+.an3k
+.an1s
+.be5la
+.be1t
+.bi4tr
+.der3i
+.diagno5
+.her3
+.hoved3
+.ne4t5
+.om1
+.ove4
+.po1
+.til3
+.yd5r
+ab5le
+3abst
+a3c
+ade5la
+5adg
+a1e
+5afg
+5a4f1l
+af3r
+af4ri
+5afs
+a4gef
+a4gi
+ag5in
+ag5si
+3agti
+a4gy
+a3h
+ais5t
+a3j
+a5ka
+a3ke
+a5kr
+aku5
+a3la
+a1le
+a1li
+al3k
+4alkv
+a1lo
+al5si
+a3lu
+a1ly
+am4pa
+3analy
+an4k5r
+a3nu
+3anv
+a5o
+a5pe
+a3pi
+a5po
+a1ra
+ar5af
+1arb
+a1re
+5arg
+a1ri
+a3ro
+a3sa
+a3sc
+a1si
+a3sk
+a3so
+3a3sp
+a3ste
+a3sti
+a1ta1
+a1te
+a1ti
+a4t5in
+a1to
+ato5v
+a5tr
+a1tu
+a5va
+a1ve
+a5z
+1ba
+ba4ti
+4bd
+1be
+be1k
+be3ro
+be5ru
+be1s4
+be1tr
+1bi
+bi5sk
+b1j
+4b1n
+1bo
+bo4gr
+bo3ra
+bo5re
+1br4
+4bs
+bs5k
+b3so
+b1st
+b5t
+3bu
+bu4s5tr
+b5w
+1by
+by5s
+4c1c
+1ce
+ce5ro
+3ch
+4ch.
+ci4o
+ck3
+5cy
+3da
+4d3af
+d5anta
+da4s
+d1b
+d1d4
+1de
+de5d
+4de4lem
+der5eri
+de4rig
+de5sk
+d1f
+d1g
+d3h
+1di
+di1e
+di5l
+d3j
+d1k
+d1l
+d1m
+4d1n
+3do
+4dop
+d5ov
+d1p
+4drett
+5d4reve
+3drif
+3driv
+d5ros
+d5ru
+ds5an
+ds5in
+d1ski
+d4sm
+d4su
+dsu5l
+ds5vi
+d3ta
+d1te
+dt5o
+d5tr
+dt5u
+1du
+dub5
+d1v
+3dy
+e5ad
+e3af
+e5ag
+e3ak
+e1al
+ea4la
+e3an
+e5ap
+e3at
+e3bl
+ebs3
+e1ci
+ed5ar
+edde4
+eddel5
+e4do
+ed5ra
+ed3re
+ed3rin
+ed4str
+e3e
+3eff
+e3fr
+3eft
+e3gu
+e1h
+e3in
+ei5s
+e3je
+e4j5el
+e1ka
+e3ke
+e3kl
+4e1ko
+e5kr
+ek5sa
+3eksem
+3eksp
+e3ku
+e1kv
+e5ky
+e3lad
+el3ak
+el3ar
+e1las
+e3le
+e4lek
+3elem
+e1li
+5elim
+e3lo
+el5sa
+e5lu
+e3ly
+e4mad
+em4p5le
+em1s
+en5ak
+e4nan
+4enn
+e4no
+en3so
+e5nu
+e5ol
+e3op
+e1or
+e3ov
+epi3
+e1pr
+e3ra
+er3af
+e4rag
+e4rak
+e1re
+e4ref
+er5ege
+5erhv
+e1ri
+e4rib
+er1k
+ero5d
+er5ov
+er3s
+er5tr
+e3rum
+er5un
+e5ry
+e1ta
+e1te
+etek4s
+e1ti
+e3tj
+e1to
+e3tr
+e3tu
+e1ty
+e3um
+e3un
+3eur
+e1va
+e3ve
+e4v3erf
+e1vi
+e5x
+1fa
+fa4ce
+fags3
+f1b
+f1d
+1fe
+fej4
+fejl1
+f1f
+f1g
+f1h
+1fi
+f1k
+3fl
+1fo
+for1en
+fo4ri
+f1p
+f1s4
+4ft
+f3ta
+f1te
+f1ti
+f5to
+f5tvi
+1fu
+f1v
+3fy
+1ga
+g3art
+g1b
+g1d
+1ge
+4g5enden
+ger3in
+ge3s
+g3f
+g1g
+g1h
+1gi
+gi4b
+gi3st
+5gj
+g3k
+g1l
+g1m
+3go
+4g5om
+g5ov
+g3p
+1gr
+gs1a
+gsde4len
+g4se
+gsha4
+g5sla
+gs3or
+gs1p
+g5s4tide
+g4str
+gs1v
+g3ta
+g1te
+g1ti
+g5to
+g3tr
+gt4s
+g3ud
+gun5
+g3v
+1gy
+g5yd
+4ha.
+heds3
+he5s
+4het
+hi4e
+hi4n5
+hi3s
+ho5ko
+ho5ve
+4h3t
+hun4
+hund3
+hvo4
+i1a
+i3b
+i4ble
+i1c
+i3dr
+ids5k
+i1el
+i1en
+i3er
+i3et.
+if3r
+i3gu
+i3h
+i5i
+i5j
+i1ka
+i1ke
+ik1l
+i5ko
+ik3re
+ik5ri
+iks5t
+ik4tu
+i3ku
+ik3v
+i3lag
+il3eg
+il5ej
+il5el
+i3li
+i4l5id
+il3k
+i1lo
+il5u
+i3mu
+ind3t
+5inf
+ings1
+in3s
+in4sv
+inter1
+i3nu
+i3od
+i3og
+i5ok
+i3ol
+ion4
+ions1
+i5o5r
+i3ot
+i5pi
+i3pli
+i5pr
+i3re
+i3ri
+ir5t
+i3sc
+i3si
+i4sm
+is3p
+i1ster
+i3sti
+i5sua
+i1ta
+i1te
+i1ti
+i3to
+i3tr
+it5re.
+i1tu
+i3ty
+i1u
+i1va
+i1ve
+i1vi
+j3ag
+jde4rer
+jds1
+jek4to
+4j5en.
+j5k
+j3le
+j3li
+jlmeld5
+jlmel4di
+j3r
+jre5
+ju3s
+5kap
+k5au
+5kav
+k5b
+kel5s
+ke3sk
+ke5st
+ke4t5a
+k3h
+ki3e
+ki3st
+k1k
+k5lak
+k1le
+3klu
+k4ny
+5kod
+1kon
+ko3ra
+3kort
+ko3v
+1kra
+5kry
+ks3an
+k1si
+ks3k
+ks1p
+k3ste
+k5stu
+ks5v
+k1t
+k4tar
+k4terh
+kti4e
+kt5re
+kt5s
+3kur
+1kus
+3kut
+k4vo
+k4vu
+5lab
+lad3r
+5lagd
+la4g3r
+5lam
+1lat
+l1b
+ldiagnos5
+l3dr
+ld3st
+1le.
+5led
+4lele
+le4mo
+3len
+1ler
+1les
+4leu
+l1f
+lfin4
+lfind5
+l1go1
+l3h
+li4ga
+4l5ins
+4l3int
+li5o
+l3j
+l1ke
+l1ko
+l3ky
+l1l
+l5mu
+lo4du
+l3op
+4l5or
+3lov
+4l3p
+l4ps
+l3r
+4ls
+lses1
+ls5in
+l5sj
+l1ta
+l4taf
+l1te
+l4t5erf
+l3ti
+lt3o
+l3tr
+l3tu
+lu5l
+l3ve
+l3vi
+1ma
+m1b
+m3d
+1me
+4m5ej
+m3f
+m1g
+m3h
+1mi
+mi3k
+m5ing
+mi4o
+mi5sty
+m3k
+m1l
+m1m
+mmen5
+m1n
+3mo
+mo4da
+4mop
+4m5ov
+m1pe
+m3pi
+m3pl
+m1po
+m3pr
+m1r
+mse5s
+ms5in
+m5sk
+ms3p
+m3ste
+ms5v
+m3ta
+m3te
+m3ti
+m3tr
+m1ud
+1mul
+mu1li
+3my
+3na
+4nak
+1nal
+n1b
+n1c
+4nd
+n3dr
+nd5si
+nd5sk
+nd5sp
+1ne
+ne5a
+ne4da
+nemen4
+nement5e
+neo4
+n3erk
+n5erl
+ne5sl
+ne5st
+n1f
+n4go
+4n1h
+1ni
+4nim
+ni5o
+ni3st
+n1ke
+n1ko
+n3kr
+n3ku
+n5kv
+4n1l
+n1m
+n1n
+1no
+n3ord
+n5p
+n3r
+4ns
+n3si
+n1sku
+ns3po
+n1sta
+n5sti
+n1ta
+nta4le
+n1te
+n1ti
+ntiali4
+n3to
+n1tr
+nt4s5t
+nt4su
+n3tu
+n3ty
+4n1v
+3ny
+n3z
+o3a
+o4as
+ob3li
+o1c
+o4din
+od5ri
+od5s
+od5un
+o1e
+of5r
+o4gek
+o4gel
+o4g5o
+og5re
+og5sk
+o5h
+o5in
+oi6s5e
+o1j
+o3ka
+o1ke
+o3ku
+o3la
+o3le
+o1li
+o1lo
+o3lu
+o5ly
+1omr
+on3k
+ook5
+o3or
+o5ov
+o3pi
+op3l
+op3r
+op3s
+3opta
+4or.
+or1an
+3ordn
+ord5s
+o3re.
+o3reg
+o3rek
+o3rer
+o3re3s
+o3ret
+o3ri
+3orient
+or5im
+o4r5in
+or3k
+or5o
+or3sl
+or3st
+o3si
+o3so
+o3t
+o1te
+o5un
+ov4s
+3pa
+pa5gh
+p5anl
+p3d
+4pec
+3pen
+1per
+pe1ra
+pe5s
+pe3u
+p3f
+4p5h
+1pla
+p4lan
+4ple.
+4pler
+4ples
+p3m
+p3n
+5pok
+4po3re
+3pot
+4p5p4
+p4ro
+1proc
+p3sk
+p5so
+ps4p
+p3st
+p1t
+1pu
+pu5b
+p5ule
+p5v
+5py3
+qu4
+4raf
+ra5is
+4rarb
+r1b
+r4d5ar
+r3dr
+rd4s3
+4reks
+1rel
+re5la
+r5enss
+5rese
+re5spo
+4ress
+re3st
+re5s4u
+5rett
+r1f
+r1gu
+r1h
+ri1e
+ri5la
+4rimo
+r4ing
+ringse4
+ringso4r
+4rinp
+4rint
+r3ka
+r1ke
+r1ki
+rk3so
+r3ku
+r1l
+rmo4
+r5mu
+r1n
+ro1b
+ro3p
+r3or
+r3p
+r1r
+rre5s
+rro4n5
+r1sa
+r1si
+r5skr
+r4sk5v
+rs4n
+r3sp
+r5stu
+r5su
+r3sv
+r5tal
+r1te
+r4teli
+r1ti
+r3to
+r4t5or
+rt5rat
+rt3re
+r5tri
+r5tro
+rt3s
+r5ty
+r3ud
+run4da
+5rut
+r3va
+r1ve
+r3vi
+ry4s
+s3af
+1sam
+sa4ma
+s3ap
+s1ar
+1sat
+4s1b
+s1d
+sdy4
+1se
+s4ed
+5s4er
+se4se
+s1f
+4s1g4
+4s3h
+si4bl
+1sig
+s5int
+5sis
+5sit
+5siu
+s5ju
+4sk.
+1skab
+1ske
+s3kl
+sk5s4
+5sky
+s1le
+s1li
+slo3
+5slu
+s5ly
+s1m
+s4my
+4snin
+s4nit
+so5k
+5sol
+5som.
+3somm
+s5oms
+5somt
+3son
+4s1op
+sp4
+3spec
+4sper
+3s4pi
+s1pl
+3sprog.
+s5r4
+s1s4
+4st.
+5s4tam
+1stan
+st5as
+3stat
+1stav
+1ste.
+1sted
+3stel
+5stemo
+1sten
+5step
+3ster.
+3stes
+5stet
+5stj
+3sto
+st5om
+1str
+s1ud
+3sul
+s3un
+3sur
+s3ve
+3s4y
+1sy1s
+5ta.
+1tag
+tands3
+4tanv
+4tb
+tede4l
+teds5
+3teg
+5tekn
+teo1
+5term
+te5ro
+4t1f
+6t3g
+t1h
+tialis5t
+3tid
+ti4en
+ti3st
+4t3k
+4t1l
+tli4s5
+t1m
+t1n
+to5ra
+to1re
+to1ri
+tor4m
+4t3p
+t4ra
+4tres
+tro5v
+1try
+4ts
+t3si
+ts4pa
+ts5pr
+t3st
+ts5ul
+4t1t
+t5uds
+5tur
+t5ve
+1typ
+u1a
+5udl
+ud5r
+ud3s
+3udv
+u1e
+ue4t5
+uge4ri
+ugs3
+u5gu
+u3i
+u5kl
+uk4ta
+uk4tr
+u1la
+u1le
+u5ly
+u5pe
+up5l
+u5q
+u3ra
+u3re
+u4r3eg
+u1rer
+u3ro
+us5a
+u3si
+u5ska
+u5so
+us5v
+u1te
+u1ti
+u1to
+ut5r
+ut5s4
+5u5v
+va5d
+3varm
+1ved
+ve4l5e
+ve4reg
+ve3s
+5vet
+v5h
+vi4l3in
+1vis
+v5j
+v5k
+vl4
+v3le
+v5li
+vls1
+1vo
+4v5om
+v5p
+v5re
+v3st
+v5su
+v5t
+3vu
+y3a
+y5dr
+y3e
+y3ke
+y5ki
+yk3li
+y3ko
+yk4s5
+y3kv
+y5li
+y5lo
+y5mu
+yns5
+y5o
+y1pe
+y3pi
+y3re
+yr3ek
+y3ri
+y3si
+y3ti
+y5t3r
+y5ve
+zi5o
+<!-- dkspecial -->
+.så3
+.ær5i
+.øv3r
+a3tø
+a5væ
+brød3
+5bæ
+5drøv
+dstå4
+3dæ
+3dø
+e3læ
+e3lø
+e3rø
+er5øn
+e5tæ
+e5tø
+e1væ
+e3æ
+e5å
+3fæ
+3fø
+fø4r5en
+giø4
+g4sø
+g5så
+3gæ
+3gø1
+3gå
+i5tæ
+i3ø
+3kø
+3kå
+lingeniø4
+l3væ
+5løs
+m5tå
+1mæ
+3mø
+3må
+n3kæ
+n5tæ
+3næ
+4n5æb
+5nø
+o5læ
+or3ø
+o5å
+5præ
+5pæd
+på3
+r5kæ
+r5tæ
+r5tø
+r3væ
+r5æl
+4røn
+5rør
+3råd
+r5år
+s4kå
+3slå
+s4næ
+5stø
+1stå
+1sæ
+4s5æn
+1sø
+s5øk
+så4r5
+ti4ø
+3træk.
+t4sø
+t5så
+t3væ
+u3læ
+3værd
+1værk
+5vå
+y5væ
+æb3l
+æ3c
+æ3e
+æg5a
+æ4gek
+æ4g5r
+ægs5
+æ5i
+æ5kv
+ælle4
+æn1dr
+æ5o
+æ1re
+ær4g5r
+æ3ri
+ær4ma
+ær4mo
+ær5s
+æ5si
+æ3so
+æ3ste
+æ3ve
+øde5
+ø3e
+ø1je
+ø3ke
+ø3le
+øms5
+øn3st
+øn4t3
+ø1re
+ø3ri
+ørne3
+ør5o
+ø1ve
+å1d
+å1e
+å5h
+å3l
+å3re
+års5t
+å5sk
+å3t
+</patterns>
+</hyphenation-info>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt
new file mode 100644
index 0000000..9a14f40
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the HyphenationCompound factory,
+# in conjunction with the danish hyphenation grammar.
+læse
+hest

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 1b641b7..504ec5f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -84,6 +84,9 @@
     <Compile Include="Analysis\CommonGrams\CommonGramsFilterTest.cs" />
     <Compile Include="Analysis\CommonGrams\TestCommonGramsFilterFactory.cs" />
     <Compile Include="Analysis\CommonGrams\TestCommonGramsQueryFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\TestCompoundWordTokenFilter.cs" />
+    <Compile Include="Analysis\Compound\TestDictionaryCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\TestHyphenationCompoundWordTokenFilterFactory.cs" />
     <Compile Include="Analysis\Core\TestAnalyzers.cs" />
     <Compile Include="Analysis\Core\TestBugInSomething.cs" />
     <Compile Include="Analysis\Core\TestClassicAnalyzer.cs" />
@@ -476,7 +479,11 @@
     <EmbeddedResource Include="Analysis\No\nn_light.txt" />
     <EmbeddedResource Include="Analysis\No\nn_minimal.txt" />
   </ItemGroup>
-  <ItemGroup />
+  <ItemGroup>
+    <EmbeddedResource Include="Analysis\Compound\compoundDictionary.txt" />
+    <EmbeddedResource Include="Analysis\Compound\da_UTF8.xml" />
+    <EmbeddedResource Include="Analysis\Compound\da_compoundDictionary.txt" />
+  </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.


Mime
View raw message