lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [21/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:39:10 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
new file mode 100644
index 0000000..a1e8438
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
@@ -0,0 +1,1587 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+	using CJKBigramFilter = org.apache.lucene.analysis.cjk.CJKBigramFilter;
+	using CommonGramsFilter = org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+	using CommonGramsQueryFilter = org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
+	using HyphenationCompoundWordTokenFilter = org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
+	using TestCompoundWordTokenFilter = org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
+	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+	using Dictionary = org.apache.lucene.analysis.hunspell.Dictionary;
+	using TestHunspellStemFilter = org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
+	using HyphenatedWordsFilter = org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
+	using LimitTokenCountFilter = org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
+	using LimitTokenPositionFilter = org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
+	using StemmerOverrideFilter = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+	using StemmerOverrideMap = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+	using WordDelimiterFilter = org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+	using EdgeNGramTokenFilter = org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
+	using Lucene43EdgeNGramTokenizer = org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
+	using PathHierarchyTokenizer = org.apache.lucene.analysis.path.PathHierarchyTokenizer;
+	using ReversePathHierarchyTokenizer = org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
+	using IdentityEncoder = org.apache.lucene.analysis.payloads.IdentityEncoder;
+	using PayloadEncoder = org.apache.lucene.analysis.payloads.PayloadEncoder;
+	using TestSnowball = org.apache.lucene.analysis.snowball.TestSnowball;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using SynonymMap = org.apache.lucene.analysis.synonym.SynonymMap;
+	using CharArrayMap = org.apache.lucene.analysis.util.CharArrayMap;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using WikipediaTokenizer = org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using Rethrow = org.apache.lucene.util.Rethrow;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+	using Version = org.apache.lucene.util.Version;
+	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+	using AfterClass = org.junit.AfterClass;
+	using BeforeClass = org.junit.BeforeClass;
+	using SnowballProgram = org.tartarus.snowball.SnowballProgram;
+	using InputSource = org.xml.sax.InputSource;
+
+	/// <summary>
+	/// tests random analysis chains </summary>
+	public class TestRandomChains : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.Tokenizer>> tokenizers;
+	  internal static IList<Constructor<?>> tokenizers;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.TokenFilter>> tokenfilters;
+	  internal static IList<Constructor<?>> tokenfilters;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.CharFilter>> charfilters;
+	  internal static IList<Constructor<?>> charfilters;
+
+	  private interface Predicate<T>
+	  {
+		bool apply(T o);
+	  }
+
+	  private static readonly Predicate<object[]> ALWAYS = new PredicateAnonymousInnerClassHelper();
+
+	  private class PredicateAnonymousInnerClassHelper : Predicate<object[]>
+	  {
+		  public PredicateAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  public virtual bool apply(object[] args)
+		  {
+			return true;
+		  };
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private static final java.util.Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new java.util.HashMap<>();
+	  private static readonly IDictionary<Constructor<?>, Predicate<object[]>> brokenConstructors = new Dictionary<Constructor<?>, Predicate<object[]>>();
+	  static TestRandomChains()
+	  {
+		try
+		{
+		  brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(typeof(TokenStream), typeof(int))] = ALWAYS;
+		  brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(typeof(TokenStream), typeof(int), typeof(bool))] = new PredicateAnonymousInnerClassHelper2();
+		  brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(typeof(TokenStream), typeof(int))] = ALWAYS;
+		  brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(typeof(TokenStream), typeof(int), typeof(bool))] = new PredicateAnonymousInnerClassHelper3();
+		  foreach (Type c in Arrays.asList<Type>(typeof(CachingTokenFilter), typeof(CrankyTokenFilter), typeof(ValidatingTokenFilter)))
+			  // TODO: can we promote some of these to be only
+			  // offsets offenders?
+			  // doesn't actual reset itself!
+			  // Not broken, simulates brokenness:
+			  // Not broken: we forcefully add this, so we shouldn't
+			  // also randomly pick it:
+		  {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: for (Constructor<?> ctor : c.getConstructors())
+			foreach (Constructor<?> ctor in c.GetConstructors())
+			{
+			  brokenConstructors[ctor] = ALWAYS;
+			}
+		  }
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+		try
+		{
+		  foreach (Type c in Arrays.asList<Type>(typeof(ReversePathHierarchyTokenizer), typeof(PathHierarchyTokenizer), typeof(WikipediaTokenizer), typeof(CJKBigramFilter), typeof(HyphenatedWordsFilter), typeof(CommonGramsFilter), typeof(CommonGramsQueryFilter), typeof(WordDelimiterFilter)))
+			  // TODO: it seems to mess up offsets!?
+			  // TODO: doesn't handle graph inputs
+			  // TODO: doesn't handle graph inputs (or even look at positionIncrement)
+			  // TODO: LUCENE-4983
+			  // TODO: doesn't handle graph inputs
+			  // TODO: probably doesnt handle graph inputs, too afraid to try
+		  {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: for (Constructor<?> ctor : c.getConstructors())
+			foreach (Constructor<?> ctor in c.GetConstructors())
+			{
+			  brokenOffsetsConstructors[ctor] = ALWAYS;
+			}
+		  }
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+		allowedTokenizerArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+		allowedTokenizerArgs.addAll(argProducers.Keys);
+		allowedTokenizerArgs.Add(typeof(Reader));
+		allowedTokenizerArgs.Add(typeof(AttributeSource.AttributeFactory));
+		allowedTokenizerArgs.Add(typeof(AttributeSource));
+
+		allowedTokenFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+		allowedTokenFilterArgs.addAll(argProducers.Keys);
+		allowedTokenFilterArgs.Add(typeof(TokenStream));
+		// TODO: fix this one, thats broken:
+		allowedTokenFilterArgs.Add(typeof(CommonGramsFilter));
+
+		allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+		allowedCharFilterArgs.addAll(argProducers.Keys);
+		allowedCharFilterArgs.Add(typeof(Reader));
+	  }
+
+	  private class PredicateAnonymousInnerClassHelper2 : Predicate<object[]>
+	  {
+		  public PredicateAnonymousInnerClassHelper2()
+		  {
+		  }
+
+		  public virtual bool apply(object[] args)
+		  {
+			Debug.Assert(args.Length == 3);
+			return !((bool?) args[2]); // args are broken if consumeAllTokens is false
+		  }
+	  }
+
+	  private class PredicateAnonymousInnerClassHelper3 : Predicate<object[]>
+	  {
+		  public PredicateAnonymousInnerClassHelper3()
+		  {
+		  }
+
+		  public virtual bool apply(object[] args)
+		  {
+			Debug.Assert(args.Length == 3);
+			return !((bool?) args[2]); // args are broken if consumeAllTokens is false
+		  }
+	  }
+
+	  // TODO: also fix these and remove (maybe):
+	  // Classes/options that don't produce consistent graph offsets:
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private static final java.util.Map<Constructor<?>,Predicate<Object[]>> brokenOffsetsConstructors = new java.util.HashMap<>();
+	  private static readonly IDictionary<Constructor<?>, Predicate<object[]>> brokenOffsetsConstructors = new Dictionary<Constructor<?>, Predicate<object[]>>();
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public static void beforeClass()
+	  {
+		IList<Type> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
+		tokenizers = new List<>();
+		tokenfilters = new List<>();
+		charfilters = new List<>();
+		foreach (Class c in analysisClasses)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int modifiers = c.getModifiers();
+		  int modifiers = c.Modifiers;
+		  if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter))))
+		  {
+			// don't waste time with abstract classes or deprecated known-buggy ones
+			continue;
+		  }
+
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: for (final Constructor<?> ctor : c.getConstructors())
+		  foreach (Constructor<?> ctor in c.Constructors)
+		  {
+			// don't test synthetic or deprecated ctors, they likely have known bugs:
+			if (ctor.Synthetic || ctor.isAnnotationPresent(typeof(Deprecated)) || brokenConstructors[ctor] == ALWAYS)
+			{
+			  continue;
+			}
+			if (c.IsSubclassOf(typeof(Tokenizer)))
+			{
+//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
+			  assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedTokenizerArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
+			  tokenizers.Add(castConstructor(typeof(Tokenizer), ctor));
+			}
+			else if (c.IsSubclassOf(typeof(TokenFilter)))
+			{
+//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
+			  assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
+			  tokenfilters.Add(castConstructor(typeof(TokenFilter), ctor));
+			}
+			else if (c.IsSubclassOf(typeof(CharFilter)))
+			{
+//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
+			  assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedCharFilterArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
+			  charfilters.Add(castConstructor(typeof(CharFilter), ctor));
+			}
+			else
+			{
+			  fail("Cannot get here");
+			}
+		  }
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Comparator<Constructor<?>> ctorComp = new java.util.Comparator<Constructor<?>>()
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+		IComparer<Constructor<?>> ctorComp = new ComparatorAnonymousInnerClassHelper();
+		tokenizers.Sort(ctorComp);
+		tokenfilters.Sort(ctorComp);
+		charfilters.Sort(ctorComp);
+		if (VERBOSE)
+		{
+		  Console.WriteLine("tokenizers = " + tokenizers);
+		  Console.WriteLine("tokenfilters = " + tokenfilters);
+		  Console.WriteLine("charfilters = " + charfilters);
+		}
+	  }
+
+	  private class ComparatorAnonymousInnerClassHelper : IComparer<Constructor<JavaToDotNetGenericWildcard>>
+	  {
+		  public ComparatorAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  public virtual int compare<T1, T2>(Constructor<T1> arg0, Constructor<T2> arg1)
+		  {
+			return arg0.toGenericString().compareTo(arg1.toGenericString());
+		  }
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @AfterClass public static void afterClass()
+	  public static void afterClass()
+	  {
+		tokenizers = null;
+		tokenfilters = null;
+		charfilters = null;
+	  }
+
+	  /// <summary>
+	  /// Hack to work around the stupidness of Oracle's strict Java backwards compatibility.
+	  /// {@code Class<T>#getConstructors()} should return unmodifiable {@code List<Constructor<T>>} not array! 
+	  /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor)
+	  private static Constructor<T> castConstructor<T, T1>(Type<T> instanceClazz, Constructor<T1> ctor)
+	  {
+		return (Constructor<T>) ctor;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static java.util.List<Class> getClassesForPackage(String pckgname) throws Exception
+	  public static IList<Type> getClassesForPackage(string pckgname)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<Class> classes = new java.util.ArrayList<>();
+		IList<Type> classes = new List<Type>();
+		collectClassesForPackage(pckgname, classes);
+		assertFalse("No classes found in package '" + pckgname + "'; maybe your test classes are packaged as JAR file?", classes.Count == 0);
+		return classes;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static void collectClassesForPackage(String pckgname, java.util.List<Class> classes) throws Exception
+	  private static void collectClassesForPackage(string pckgname, IList<Type> classes)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ClassLoader cld = TestRandomChains.class.getClassLoader();
+		ClassLoader cld = typeof(TestRandomChains).ClassLoader;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String path = pckgname.replace('.', '/');
+		string path = pckgname.Replace('.', '/');
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Iterator<java.net.URL> resources = cld.getResources(path);
+		IEnumerator<URL> resources = cld.getResources(path);
+		while (resources.MoveNext())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.net.URI uri = resources.Current.toURI();
+		  URI uri = resources.Current.toURI();
+		  if (!"file".Equals(uri.Scheme, StringComparison.CurrentCultureIgnoreCase))
+		  {
+			continue;
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.io.File directory = new java.io.File(uri);
+		  File directory = new File(uri);
+		  if (directory.exists())
+		  {
+			string[] files = directory.list();
+			foreach (string file in files)
+			{
+			  if ((new File(directory, file)).Directory)
+			  {
+				// recurse
+				string subPackage = pckgname + "." + file;
+				collectClassesForPackage(subPackage, classes);
+			  }
+			  if (file.EndsWith(".class", StringComparison.Ordinal))
+			  {
+				string clazzName = file.Substring(0, file.Length - 6);
+				// exclude Test classes that happen to be in these packages.
+				// class.ForName'ing some of them can cause trouble.
+				if (!clazzName.EndsWith("Test", StringComparison.Ordinal) && !clazzName.StartsWith("Test", StringComparison.Ordinal))
+				{
+				  // Don't run static initializers, as we won't use most of them.
+				  // Java will do that automatically once accessed/instantiated.
+				  classes.Add(Type.GetType(pckgname + '.' + clazzName, false, cld));
+				}
+			  }
+			}
+		  }
+		}
+	  }
+
+	  private interface ArgProducer
+	  {
+		object create(Random random);
+	  }
+
+	  private static readonly IDictionary<Type, ArgProducer> argProducers = new IdentityHashMapAnonymousInnerClassHelper();
+
+	  private class IdentityHashMapAnonymousInnerClassHelper : IdentityHashMap<Type, ArgProducer>
+	  {
+		  public IdentityHashMapAnonymousInnerClassHelper()
+		  {
+		  }
+
+	//	  {
+	//	put(int.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: could cause huge ram usage to use full int range for some filters
+	//		// (e.g. allocate enormous arrays)
+	//		// return Integer.valueOf(random.nextInt());
+	//		return Integer.valueOf(TestUtil.nextInt(random, -100, 100));
+	//	  }
+	//	}
+	//   );
+	//	put(char.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: fix any filters that care to throw IAE instead.
+	//		// also add a unicode validating filter to validate termAtt?
+	//		// return Character.valueOf((char)random.nextInt(65536));
+	//		while(true)
+	//		{
+	//		  char c = (char)random.nextInt(65536);
+	//		  if (c < '\uD800' || c > '\uDFFF')
+	//		  {
+	//			return Character.valueOf(c);
+	//		  }
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(float.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return Float.valueOf(random.nextFloat());
+	//	  }
+	//	}
+	//   );
+	//	put(boolean.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return Boolean.valueOf(random.nextBoolean());
+	//	  }
+	//	}
+	//   );
+	//	put(byte.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// this wraps to negative when casting to byte
+	//		return Byte.valueOf((byte) random.nextInt(256));
+	//	  }
+	//	}
+	//   );
+	//	put(byte[].class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		byte bytes[] = new byte[random.nextInt(256)];
+	//		random.nextBytes(bytes);
+	//		return bytes;
+	//	  }
+	//	}
+	//   );
+	//	put(Random.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return new Random(random.nextLong());
+	//	  }
+	//	}
+	//   );
+	//	put(Version.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// we expect bugs in emulating old versions
+	//		return TEST_VERSION_CURRENT;
+	//	  }
+	//	}
+	//   );
+	//	put(Set.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TypeTokenFilter
+	//		Set<String> set = new HashSet<>();
+	//		int num = random.nextInt(5);
+	//		for (int i = 0; i < num; i++)
+	//		{
+	//		  set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
+	//		}
+	//		return set;
+	//	  }
+	//	}
+	//   );
+	//	put(Collection.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// CapitalizationFilter
+	//		Collection<char[]> col = new ArrayList<>();
+	//		int num = random.nextInt(5);
+	//		for (int i = 0; i < num; i++)
+	//		{
+	//		  col.add(TestUtil.randomSimpleString(random).toCharArray());
+	//		}
+	//		return col;
+	//	  }
+	//	}
+	//   );
+	//	put(CharArraySet.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		int num = random.nextInt(10);
+	//		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
+	//		for (int i = 0; i < num; i++)
+	//		{
+	//		  // TODO: make nastier
+	//		  set.add(TestUtil.randomSimpleString(random));
+	//		}
+	//		return set;
+	//	  }
+	//	}
+	//   );
+	//	put(Pattern.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: don't want to make the exponentially slow ones Dawid documents
+	//		// in TestPatternReplaceFilter, so dont use truly random patterns (for now)
+	//		return Pattern.compile("a");
+	//	  }
+	//	}
+	//   );
+	//
+	//	put(Pattern[].class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")};
+	//	  }
+	//	}
+	//   );
+	//	put(PayloadEncoder.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
+	//	  }
+	//	}
+	//   );
+	//	put(Dictionary.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: make nastier
+	//		InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
+	//		InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
+	//		try
+	//		{
+	//		 return new Dictionary(affixStream, dictStream);
+	//		}
+	//		catch (Exception ex)
+	//		{
+	//		  Rethrow.rethrow(ex);
+	//		  return null; // unreachable code
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(Lucene43EdgeNGramTokenizer.Side.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return random.nextBoolean() ? Lucene43EdgeNGramTokenizer.Side.FRONT : Lucene43EdgeNGramTokenizer.Side.BACK;
+	//	  }
+	//	}
+	//   );
+	//	put(EdgeNGramTokenFilter.Side.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		return random.nextBoolean() ? EdgeNGramTokenFilter.Side.FRONT : EdgeNGramTokenFilter.Side.BACK;
+	//	  }
+	//	}
+	//   );
+	//	put(HyphenationTree.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: make nastier
+	//		try
+	//		{
+	//		  InputSource @is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
+	//		  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+	//		  return hyphenator;
+	//		}
+	//		catch (Exception ex)
+	//		{
+	//		  Rethrow.rethrow(ex);
+	//		  return null; // unreachable code
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(SnowballProgram.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		try
+	//		{
+	//		  String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
+	//		  Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
+	//		  return clazz.newInstance();
+	//		}
+	//		catch (Exception ex)
+	//		{
+	//		  Rethrow.rethrow(ex);
+	//		  return null; // unreachable code
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(String.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: make nastier
+	//		if (random.nextBoolean())
+	//		{
+	//		  // a token type
+	//		  return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
+	//		}
+	//		else
+	//		{
+	//		  return TestUtil.randomSimpleString(random);
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(NormalizeCharMap.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+	//		// we can't add duplicate keys, or NormalizeCharMap gets angry
+	//		Set<String> keys = new HashSet<>();
+	//		int num = random.nextInt(5);
+	//		//System.out.println("NormalizeCharMap=");
+	//		for (int i = 0; i < num; i++)
+	//		{
+	//		  String key = TestUtil.randomSimpleString(random);
+	//		  if (!keys.contains(key) && key.length() > 0)
+	//		  {
+	//			String value = TestUtil.randomSimpleString(random);
+	//			builder.add(key, value);
+	//			keys.add(key);
+	//			//System.out.println("mapping: '" + key + "' => '" + value + "'");
+	//		  }
+	//		}
+	//		return builder.build();
+	//	  }
+	//	}
+	//   );
+	//	put(CharacterRunAutomaton.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		// TODO: could probably use a purely random automaton
+	//		switch(random.nextInt(5))
+	//		{
+	//		  case 0:
+	//			  return MockTokenizer.KEYWORD;
+	//		  case 1:
+	//			  return MockTokenizer.SIMPLE;
+	//		  case 2:
+	//			  return MockTokenizer.WHITESPACE;
+	//		  case 3:
+	//			  return MockTokenFilter.EMPTY_STOPSET;
+	//		  default:
+	//			  return MockTokenFilter.ENGLISH_STOPSET;
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(CharArrayMap.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		int num = random.nextInt(10);
+	//		CharArrayMap<String> map = new CharArrayMap<>(TEST_VERSION_CURRENT, num, random.nextBoolean());
+	//		for (int i = 0; i < num; i++)
+	//		{
+	//		  // TODO: make nastier
+	//		  map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
+	//		}
+	//		return map;
+	//	  }
+	//	}
+	//   );
+	//	put(StemmerOverrideMap.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		int num = random.nextInt(10);
+	//		StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
+	//		for (int i = 0; i < num; i++)
+	//		{
+	//		  String input = "";
+	//		  do
+	//		  {
+	//			input = TestUtil.randomRealisticUnicodeString(random);
+	//		  } while(input.isEmpty());
+	//		  String @out = "";
+	//		  TestUtil.randomSimpleString(random);
+	//		  do
+	//		  {
+	//			@out = TestUtil.randomRealisticUnicodeString(random);
+	//		  } while(@out.isEmpty());
+	//		  builder.add(input, @out);
+	//		}
+	//		try
+	//		{
+	//		  return builder.build();
+	//		}
+	//		catch (Exception ex)
+	//		{
+	//		  Rethrow.rethrow(ex);
+	//		  return null; // unreachable code
+	//		}
+	//	  }
+	//	}
+	//   );
+	//	put(SynonymMap.class, new ArgProducer()
+	//	{
+	//	  @@Override public Object create(Random random)
+	//	  {
+	//		SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
+	//		final int numEntries = atLeast(10);
+	//		for (int j = 0; j < numEntries; j++)
+	//		{
+	//		  addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
+	//		}
+	//		try
+	//		{
+	//		  return b.build();
+	//		}
+	//		catch (Exception ex)
+	//		{
+	//		  Rethrow.rethrow(ex);
+	//		  return null; // unreachable code
+	//		}
+	//	  }
+	//
+	//	  private void addSyn(SynonymMap.Builder b, String input, String output, boolean keepOrig)
+	//	  {
+	//		b.add(new CharsRef(input.replaceAll(" +", "\u0000")), new CharsRef(output.replaceAll(" +", "\u0000")), keepOrig);
+	//	  }
+	//
+	//	  private String randomNonEmptyString(Random random)
+	//	  {
+	//		while(true)
+	//		{
+	//		  final String s = TestUtil.randomUnicodeString(random).trim();
+	//		  if (s.length() != 0 && s.indexOf('\u0000') == -1)
+	//		  {
+	//			return s;
+	//		  }
+	//		}
+	//	  }
+	//	}
+	//   );
+	//  }
+	//  }
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//
+	//  static final Set<Class> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//ignore
+	//
+	//  @@SuppressWarnings("unchecked") static <T> T newRandomArg(Random random, Class<T> paramType)
+	//  {
+	//	final ArgProducer producer = argProducers.get(paramType);
+	//	assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
+	//	return (T) producer.create(random);
+	//  }
+	//
+	//  static Object[] newTokenizerArgs(Random random, Reader reader, Class[] paramTypes)
+	//  {
+	//	Object[] args = new Object[paramTypes.length];
+	//	for (int i = 0; i < args.length; i++)
+	//	{
+	//	  Class paramType = paramTypes[i];
+	//	  if (paramType == Reader.class)
+	//	  {
+	//		args[i] = reader;
+	//	  }
+	//	  else if (paramType == AttributeFactory.class)
+	//	  {
+	//		// TODO: maybe the collator one...???
+	//		args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+	//	  }
+	//	  else if (paramType == AttributeSource.class)
+	//	  {
+	//		// TODO: args[i] = new AttributeSource();
+	//		// this is currently too scary to deal with!
+	//		args[i] = null; // force IAE
+	//	  }
+	//	  else
+	//	  {
+	//		args[i] = newRandomArg(random, paramType);
+	//	  }
+	//	}
+	//	return args;
+	//  }
+	//
+	//  static Object[] newCharFilterArgs(Random random, Reader reader, Class[] paramTypes)
+	//  {
+	//	Object[] args = new Object[paramTypes.length];
+	//	for (int i = 0; i < args.length; i++)
+	//	{
+	//	  Class paramType = paramTypes[i];
+	//	  if (paramType == Reader.class)
+	//	  {
+	//		args[i] = reader;
+	//	  }
+	//	  else
+	//	  {
+	//		args[i] = newRandomArg(random, paramType);
+	//	  }
+	//	}
+	//	return args;
+	//  }
+	//
+	//  static Object[] newFilterArgs(Random random, TokenStream stream, Class[] paramTypes)
+	//  {
+	//	Object[] args = new Object[paramTypes.length];
+	//	for (int i = 0; i < args.length; i++)
+	//	{
+	//	  Class paramType = paramTypes[i];
+	//	  if (paramType == TokenStream.class)
+	//	  {
+	//		args[i] = stream;
+	//	  }
+	//	  else if (paramType == CommonGramsFilter.class)
+	//	  {
+	//		// TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
+	//		args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, newRandomArg(random, CharArraySet.class));
+	//	  }
+	//	  else
+	//	  {
+	//		args[i] = newRandomArg(random, paramType);
+	//	  }
+	//	}
+	//	return args;
+	//  }
+	//
+	//  static class MockRandomAnalyzer extends Analyzer
+	//  {
+	//	final long seed;
+	//
+	//	MockRandomAnalyzer(long seed)
+	//	{
+	//	  this.seed = seed;
+	//	}
+	//
+	//	public boolean offsetsAreCorrect()
+	//	{
+	//	  // TODO: can we not do the full chain here!?
+	//	  Random random = new Random(seed);
+	//	  TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
+	//	  TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+	//	  return filterSpec.offsetsAreCorrect;
+	//	}
+	//
+	//	@@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader)
+	//	{
+	//	  Random random = new Random(seed);
+	//	  TokenizerSpec tokenizerSpec = newTokenizer(random, reader);
+	//	  //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
+	//	  TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+	//	  //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
+	//	  return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
+	//	}
+	//
+	//	@@Override protected Reader initReader(String fieldName, Reader reader)
+	//	{
+	//	  Random random = new Random(seed);
+	//	  CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
+	//	  return charfilterspec.reader;
+	//	}
+	//
+	//	@@Override public String toString()
+	//	{
+	//	  Random random = new Random(seed);
+	//	  StringBuilder sb = new StringBuilder();
+	//	  CharFilterSpec charFilterSpec = newCharFilterChain(random, new StringReader(""));
+	//	  sb.append("\ncharfilters=");
+	//	  sb.append(charFilterSpec.toString);
+	//	  // intentional: initReader gets its own separate random
+	//	  random = new Random(seed);
+	//	  TokenizerSpec tokenizerSpec = newTokenizer(random, charFilterSpec.reader);
+	//	  sb.append("\n");
+	//	  sb.append("tokenizer=");
+	//	  sb.append(tokenizerSpec.toString);
+	//	  TokenFilterSpec tokenFilterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+	//	  sb.append("\n");
+	//	  sb.append("filters=");
+	//	  sb.append(tokenFilterSpec.toString);
+	//	  sb.append("\n");
+	//	  sb.append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
+	//	  return sb.toString();
+	//	}
+	//
+	//	private <T> T createComponent(Constructor<T> ctor, Object[] args, StringBuilder descr)
+	//	{
+	//	  try
+	//	  {
+	//		final T instance = ctor.newInstance(args);
+	// /*
+	// if (descr.length() > 0) {
+	//   descr.append(",");
+	// }
+	// */
+	//		descr.append("\n  ");
+	//		descr.append(ctor.getDeclaringClass().getName());
+	//		String @params = Arrays.deepToString(args);
+	//		@params = @params.substring(1, (@params.length()-1) - 1);
+	//		descr.append("(").append(@params).append(")");
+	//		return instance;
+	//	  }
+	//	  catch (InvocationTargetException ite)
+	//	  {
+	//		final Throwable cause = ite.getCause();
+	//		if (cause instanceof IllegalArgumentException || cause instanceof UnsupportedOperationException)
+	//	{
+	//		  // thats ok, ignore
+	//		  if (VERBOSE)
+	//		  {
+	//			System.err.println("Ignoring IAE/UOE from ctor:");
+	//			cause.printStackTrace(System.err);
+	//		  }
+	//		}
+	//		else
+	//		{
+	//		  Rethrow.rethrow(cause);
+	//		}
+	//	  }
+	//	  catch (IllegalAccessException iae)
+	//	  {
+	//		Rethrow.rethrow(iae);
+	//	  }
+	//	  catch (InstantiationException ie)
+	//	  {
+	//		Rethrow.rethrow(ie);
+	//	  }
+	//	  return null; // no success
+	//	}
+	//
+	//	private boolean broken(Constructor<?> ctor, Object[] args)
+	//	{
+	//	  final Predicate<Object[]> pred = brokenConstructors.get(ctor);
+	//	  return pred != null && pred.apply(args);
+	//	}
+	//
+	//	private boolean brokenOffsets(Constructor<?> ctor, Object[] args)
+	//	{
+	//	  final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
+	//	  return pred != null && pred.apply(args);
+	//	}
+	//
+	//	// create a new random tokenizer from classpath
+	//	private TokenizerSpec newTokenizer(Random random, Reader reader)
+	//	{
+	//	  TokenizerSpec spec = new TokenizerSpec();
+	//	  while (spec.tokenizer == null)
+	//	  {
+	//		final Constructor<? extends Tokenizer> ctor = tokenizers.get(random.nextInt(tokenizers.size()));
+	//		final StringBuilder descr = new StringBuilder();
+	//		final CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
+	//		final Object args[] = newTokenizerArgs(random, wrapper, ctor.getParameterTypes());
+	//		if (broken(ctor, args))
+	//		{
+	//		  continue;
+	//		}
+	//		spec.tokenizer = createComponent(ctor, args, descr);
+	//		if (spec.tokenizer != null)
+	//		{
+	//		  spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
+	//		  spec.toString = descr.toString();
+	//		}
+	//		else
+	//		{
+	//		  assertFalse(ctor.getDeclaringClass().getName() + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
+	//		}
+	//	  }
+	//	  return spec;
+	//	}
+	//
+	//	private CharFilterSpec newCharFilterChain(Random random, Reader reader)
+	//	{
+	//	  CharFilterSpec spec = new CharFilterSpec();
+	//	  spec.reader = reader;
+	//	  StringBuilder descr = new StringBuilder();
+	//	  int numFilters = random.nextInt(3);
+	//	  for (int i = 0; i < numFilters; i++)
+	//	  {
+	//		while (true)
+	//		{
+	//		  final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
+	//		  final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
+	//		  if (broken(ctor, args))
+	//		  {
+	//			continue;
+	//		  }
+	//		  reader = createComponent(ctor, args, descr);
+	//		  if (reader != null)
+	//		  {
+	//			spec.reader = reader;
+	//			break;
+	//		  }
+	//		}
+	//	  }
+	//	  spec.toString = descr.toString();
+	//	  return spec;
+	//	}
+	//
+	//	private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer, boolean offsetsAreCorrect)
+	//	{
+	//	  TokenFilterSpec spec = new TokenFilterSpec();
+	//	  spec.offsetsAreCorrect = offsetsAreCorrect;
+	//	  spec.stream = tokenizer;
+	//	  StringBuilder descr = new StringBuilder();
+	//	  int numFilters = random.nextInt(5);
+	//	  for (int i = 0; i < numFilters; i++)
+	//	  {
+	//
+	//		// Insert ValidatingTF after each stage so we can
+	//		// catch problems right after the TF that "caused"
+	//		// them:
+	//		spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
+	//
+	//		while (true)
+	//		{
+	//		  final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
+	//
+	//		  // hack: MockGraph/MockLookahead has assertions that will trip if they follow
+	//		  // an offsets violator. so we cant use them after e.g. wikipediatokenizer
+	//		  if (!spec.offsetsAreCorrect && (ctor.getDeclaringClass().equals(MockGraphTokenFilter.class) || ctor.getDeclaringClass().equals(MockRandomLookaheadTokenFilter.class)))
+	//		  {
+	//			continue;
+	//		  }
+	//
+	//		  final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
+	//		  if (broken(ctor, args))
+	//		  {
+	//			continue;
+	//		  }
+	//		  final TokenFilter flt = createComponent(ctor, args, descr);
+	//		  if (flt != null)
+	//		  {
+	//			spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
+	//			spec.stream = flt;
+	//			break;
+	//		  }
+	//		}
+	//	  }
+	//
+	//	  // Insert ValidatingTF after each stage so we can
+	//	  // catch problems right after the TF that "caused"
+	//	  // them:
+	//	  spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
+	//
+	//	  spec.toString = descr.toString();
+	//	  return spec;
+	//	}
+	//  }
+	//
+	//  static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter
+	//  {
+	//	boolean readSomething;
+	//
+	//	CheckThatYouDidntReadAnythingReaderWrapper(Reader @in)
+	//	{
+	//	  base(@in);
+	//	}
+	//
+	//	@@Override public int correct(int currentOff)
+	//	{
+	//	  return currentOff; // we don't change any offsets
+	//	}
+	//
+	//	@@Override public int read(char[] cbuf, int off, int len) throws IOException
+	//	{
+	//	  readSomething = true;
+	//	  return input.read(cbuf, off, len);
+	//	}
+	//
+	//	@@Override public int read() throws IOException
+	//	{
+	//	  readSomething = true;
+	//	  return input.read();
+	//	}
+	//
+	//	@@Override public int read(CharBuffer target) throws IOException
+	//	{
+	//	  readSomething = true;
+	//	  return input.read(target);
+	//	}
+	//
+	//	@@Override public int read(char[] cbuf) throws IOException
+	//	{
+	//	  readSomething = true;
+	//	  return input.read(cbuf);
+	//	}
+	//
+	//	@@Override public long skip(long n) throws IOException
+	//	{
+	//	  readSomething = true;
+	//	  return input.skip(n);
+	//	}
+	//
+	//	@@Override public void mark(int readAheadLimit) throws IOException
+	//	{
+	//	  input.mark(readAheadLimit);
+	//	}
+	//
+	//	@@Override public boolean markSupported()
+	//	{
+	//	  return input.markSupported();
+	//	}
+	//
+	//	@@Override public boolean ready() throws IOException
+	//	{
+	//	  return input.ready();
+	//	}
+	//
+	//	@@Override public void reset() throws IOException
+	//	{
+	//	  input.reset();
+	//	}
+	//  }
+	//
+	//  static class TokenizerSpec
+	//  {
+	//	Tokenizer tokenizer;
+	//	String toString;
+	//	boolean offsetsAreCorrect = true;
+	//  }
+	//
+	//  static class TokenFilterSpec
+	//  {
+	//	TokenStream stream;
+	//	String toString;
+	//	boolean offsetsAreCorrect = true;
+	//  }
+	//
+	//  static class CharFilterSpec
+	//  {
+	//	Reader reader;
+	//	String toString;
+	//  }
+	//
+	//  public void testRandomChains() throws Throwable
+	//  {
+	//	int numIterations = atLeast(20);
+	//	Random random = random();
+	//	for (int i = 0; i < numIterations; i++)
+	//	{
+	//	  MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
+	//	  if (VERBOSE)
+	//	  {
+	//		System.out.println("Creating random analyzer:" + a);
+	//	  }
+	//	  try
+	//	  {
+	//		checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false, false); // We already validate our own offsets...
+	//	  }
+	//	  catch (Throwable e)
+	//	  {
+	//		System.err.println("Exception from random analyzer: " + a);
+	//		throw e;
+	//	  }
+	//	}
+	//  }
+	//
+	//  // we might regret this decision...
+	//  public void testRandomChainsWithLargeStrings() throws Throwable
+	//  {
+	//	int numIterations = atLeast(20);
+	//	Random random = random();
+	//	for (int i = 0; i < numIterations; i++)
+	//	{
+	//	  MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
+	//	  if (VERBOSE)
+	//	  {
+	//		System.out.println("Creating random analyzer:" + a);
+	//	  }
+	//	  try
+	//	  {
+	//		checkRandomData(random, a, 50*RANDOM_MULTIPLIER, 128, false, false); // We already validate our own offsets...
+	//	  }
+	//	  catch (Throwable e)
+	//	  {
+	//		System.err.println("Exception from random analyzer: " + a);
+	//		throw e;
+	//	  }
+	//	}
+	//  }
+	//}
+
+	  }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStandardAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStandardAnalyzer.cs
new file mode 100644
index 0000000..4465288
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStandardAnalyzer.cs
@@ -0,0 +1,453 @@
+using System;
+using System.Text;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using Version = org.apache.lucene.util.Version;
+
+	public class TestStandardAnalyzer : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHugeDoc() throws java.io.IOException
+	  public virtual void testHugeDoc()
+	  {
+		StringBuilder sb = new StringBuilder();
+		char[] whitespace = new char[4094];
+		Arrays.fill(whitespace, ' ');
+		sb.Append(whitespace);
+		sb.Append("testing 1234");
+		string input = sb.ToString();
+		StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+		BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new string[] {"testing", "1234"});
+	  }
+
+	  private Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+
+			Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArmenian() throws Exception
+	  public virtual void testArmenian()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։", new string[] {"Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAmharic() throws Exception
+	  public virtual void testAmharic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", new string[] {"ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArabic() throws Exception
+	  public virtual void testArabic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", new string[] {"الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAramaic() throws Exception
+	  public virtual void testAramaic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", new string[] {"ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBengali() throws Exception
+	  public virtual void testBengali()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।", new string[] {"এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার", "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়
 ছে"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsi() throws Exception
+	  public virtual void testFarsi()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.", new string[] {"ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی", "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testGreek() throws Exception
+	  public virtual void testGreek()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", new string[] {"Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testThai() throws Exception
+	  public virtual void testThai()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", new string[] {"การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLao() throws Exception
+	  public virtual void testLao()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", new string[] {"ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTibetan() throws Exception
+	  public virtual void testTibetan()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", new string[] {"སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ"});
+	  }
+
+	  /*
+	   * For chinese, tokenize as char (these can later form bigrams or whatever)
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChinese() throws Exception
+	  public virtual void testChinese()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 1234 Tests ", new string[] {"我", "是", "中", "国", "人", "1234", "Tests"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmpty() throws Exception
+	  public virtual void testEmpty()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new string[] {});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new string[] {});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new string[] {});
+	  }
+
+	  /* test various jira issues this analyzer is related to */
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLUCENE1545() throws Exception
+	  public virtual void testLUCENE1545()
+	  {
+		/*
+		 * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
+		 * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
+		 * Expected result is only on token "moͤchte".
+		 */
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new string[] {"moͤchte"});
+	  }
+
+	  /* Tests from StandardAnalyzer, just to show behavior is similar */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAlphanumericSA() throws Exception
+	  public virtual void testAlphanumericSA()
+	  {
+		// alphanumeric tokens
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new string[]{"B2B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new string[]{"2B"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDelimitersSA() throws Exception
+	  public virtual void testDelimitersSA()
+	  {
+		// other delimiters: "-", "/", ","
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new string[]{"some", "dashed", "phrase"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new string[]{"dogs", "chase", "cats"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new string[]{"ac", "dc"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testApostrophesSA() throws Exception
+	  public virtual void testApostrophesSA()
+	  {
+		// internal apostrophes: O'Reilly, you're, O'Reilly's
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new string[]{"O'Reilly"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new string[]{"you're"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new string[]{"she's"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new string[]{"Jim's"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new string[]{"don't"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new string[]{"O'Reilly's"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumericSA() throws Exception
+	  public virtual void testNumericSA()
+	  {
+		// floating point, serial, model numbers, ip addresses, etc.
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new string[]{"21.35"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new string[]{"R2D2", "C3PO"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTextWithNumbersSA() throws Exception
+	  public virtual void testTextWithNumbersSA()
+	  {
+		// numbers
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[]{"David", "has", "5000", "bones"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVariousTextSA() throws Exception
+	  public virtual void testVariousTextSA()
+	  {
+		// various
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new string[]{"C", "embedded", "developers", "wanted"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new string[]{"foo", "bar", "FOO", "BAR"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[]{"foo", "bar", "FOO", "BAR"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new string[]{"QUOTED", "word"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKoreanSA() throws Exception
+	  public virtual void testKoreanSA()
+	  {
+		// Korean words
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[]{"안녕하세요", "한글입니다"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+	  public virtual void testOffsets()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] {"David", "has", "5000", "bones"}, new int[] {0, 6, 10, 15}, new int[] {5, 9, 14, 20});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTypes() throws Exception
+	  public virtual void testTypes()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] {"David", "has", "5000", "bones"}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnicodeWordBreaks() throws Exception
+	  public virtual void testUnicodeWordBreaks()
+	  {
+		WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+		wordBreakTest.test(a);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSupplementary() throws Exception
+	  public virtual void testSupplementary()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", new string[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"}, new string[] {"<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKorean() throws Exception
+	  public virtual void testKorean()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음", new string[] {"훈민정음"}, new string[] {"<HANGUL>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJapanese() throws Exception
+	  public virtual void testJapanese()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ", new string[] {"仮", "名", "遣", "い", "カタカナ"}, new string[] {"<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCombiningMarks() throws Exception
+	  public virtual void testCombiningMarks()
+	  {
+		checkOneTerm(a, "ざ", "ざ"); // hiragana
+		checkOneTerm(a, "ザ", "ザ"); // katakana
+		checkOneTerm(a, "壹゙", "壹゙"); // ideographic
+		checkOneTerm(a, "아゙", "아゙"); // hangul
+	  }
+
+	  /// <summary>
+	  /// Multiple consecutive chars in \p{WB:MidLetter}, \p{WB:MidNumLet},
+	  /// and/or \p{MidNum} should trigger a token split.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMid() throws Exception
+	  public virtual void testMid()
+	  {
+		// ':' is in \p{WB:MidLetter}, which should trigger a split unless there is a Letter char on both sides
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B", new string[] {"A:B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A::B", new string[] {"A", "B"});
+
+		// '.' is in \p{WB:MidNumLet}, which should trigger a split unless there is a Letter or Numeric char on both sides
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2", new string[] {"1.2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B", new string[] {"A.B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1..2", new string[] {"1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A..B", new string[] {"A", "B"});
+
+		// ',' is in \p{WB:MidNum}, which should trigger a split unless there is a Numeric char on both sides
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2", new string[] {"1,2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,,2", new string[] {"1", "2"});
+
+		// Mixed consecutive \p{WB:MidLetter} and \p{WB:MidNumLet} should trigger a split
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.:B", new string[] {"A", "B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:.B", new string[] {"A", "B"});
+
+		// Mixed consecutive \p{WB:MidNum} and \p{WB:MidNumLet} should trigger a split
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,.2", new string[] {"1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.,2", new string[] {"1", "2"});
+
+		// '_' is in \p{WB:ExtendNumLet}
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A:B", new string[] {"A:B_A:B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A::B", new string[] {"A:B_A", "B"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1.2", new string[] {"1.2_1.2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A.B", new string[] {"A.B_A.B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1..2", new string[] {"1.2_1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A..B", new string[] {"A.B_A", "B"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,2", new string[] {"1,2_1,2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,,2", new string[] {"1,2_1", "2"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A.:B", new string[] {"C_A", "B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A:.B", new string[] {"C_A", "B"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1,.2", new string[] {"3_1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new string[] {"3_1", "2"});
+	  }
+
+
+	  /// @deprecated remove this and sophisticated backwards layer in 5.0 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("remove this and sophisticated backwards layer in 5.0") public void testCombiningMarksBackwards() throws Exception
+	  [Obsolete("remove this and sophisticated backwards layer in 5.0")]
+	  public virtual void testCombiningMarksBackwards()
+	  {
+		Analyzer a = new StandardAnalyzer(Version.LUCENE_33);
+		checkOneTerm(a, "ざ", "さ"); // hiragana Bug
+		checkOneTerm(a, "ザ", "ザ"); // katakana Works
+		checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
+		checkOneTerm(a, "아゙", "아゙"); // hangul Works
+	  }
+
+	  /// @deprecated uses older unicode (6.0). simple test to make sure its basically working 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("uses older unicode (6.0). simple test to make sure its basically working") public void testVersion36() throws Exception
+	  [Obsolete("uses older unicode (6.0). simple test to make sure its basically working")]
+	  public virtual void testVersion36()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+		assertAnalyzesTo(a, "this is just a t\u08E6st lucene@apache.org", new string[] {"this", "is", "just", "a", "t", "st", "lucene", "apache.org"}); // new combining mark in 6.1
+	  };
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestStandardAnalyzer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestStandardAnalyzer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+	  /// @deprecated uses older unicode (6.1). simple test to make sure its basically working 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("uses older unicode (6.1). simple test to make sure its basically working") public void testVersion40() throws Exception
+	  [Obsolete("uses older unicode (6.1). simple test to make sure its basically working")]
+	  public virtual void testVersion40()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+		// U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
+		// on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
+		assertAnalyzesTo(a, "this is just a t\u061Cst lucene@apache.org", new string[] {"this", "is", "just", "a", "t", "st", "lucene", "apache.org"});
+	  };
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestStandardAnalyzer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestStandardAnalyzer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_40, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new StandardAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  /// <summary>
+	  /// blast some random large strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		Random random = random();
+		checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+
+	  // Adds random graph after:
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStringsGraphAfter() throws Exception
+	  public virtual void testRandomHugeStringsGraphAfter()
+	  {
+		Random random = random();
+		checkRandomData(random, new AnalyzerAnonymousInnerClassHelper4(this), 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+	  {
+		  private readonly TestStandardAnalyzer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper4(TestStandardAnalyzer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer);
+			return new TokenStreamComponents(tokenizer, tokenStream);
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopAnalyzer.cs
new file mode 100644
index 0000000..433692f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopAnalyzer.cs
@@ -0,0 +1,134 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+
+	public class TestStopAnalyzer : BaseTokenStreamTestCase
+	{
+
+	  private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT);
+	  private ISet<object> inValidTokens = new HashSet<object>();
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+	  public override void setUp()
+	  {
+		base.setUp();
+
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: java.util.Iterator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
+		IEnumerator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.GetEnumerator();
+		while (it.MoveNext())
+		{
+		  inValidTokens.Add(it.Current);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefaults() throws java.io.IOException
+	  public virtual void testDefaults()
+	  {
+		assertTrue(stop != null);
+		TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer");
+		try
+		{
+		  assertTrue(stream != null);
+		  CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));
+		  stream.reset();
+
+		  while (stream.incrementToken())
+		  {
+			assertFalse(inValidTokens.Contains(termAtt.ToString()));
+		  }
+		  stream.end();
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(stream);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopList() throws java.io.IOException
+	  public virtual void testStopList()
+	  {
+		CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
+		StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
+		TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer");
+		try
+		{
+		  assertNotNull(stream);
+		  CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));
+
+		  stream.reset();
+		  while (stream.incrementToken())
+		  {
+			string text = termAtt.ToString();
+			assertFalse(stopWordsSet.contains(text));
+		  }
+		  stream.end();
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(stream);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException
+	  public virtual void testStopListPositions()
+	  {
+		CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
+		StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
+		string s = "This is a good test of the english stop analyzer with positions";
+		int[] expectedIncr = new int[] {1, 1, 1, 3, 1, 1, 1, 2, 1};
+		TokenStream stream = newStop.tokenStream("test", s);
+		try
+		{
+		  assertNotNull(stream);
+		  int i = 0;
+		  CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));
+		  PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute));
+
+		  stream.reset();
+		  while (stream.incrementToken())
+		  {
+			string text = termAtt.ToString();
+			assertFalse(stopWordsSet.contains(text));
+			assertEquals(expectedIncr[i++],posIncrAtt.PositionIncrement);
+		  }
+		  stream.end();
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(stream);
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs
new file mode 100644
index 0000000..b1923ed
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs
@@ -0,0 +1,243 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/// <summary>
+	/// Copyright 2005 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using English = org.apache.lucene.util.English;
+	using Version = org.apache.lucene.util.Version;
+
+
+	public class TestStopFilter : BaseTokenStreamTestCase
+	{
+
+	  // other StopFilter functionality is already tested by TestStopAnalyzer
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExactCase() throws java.io.IOException
+	  public virtual void testExactCase()
+	  {
+		StringReader reader = new StringReader("Now is The Time");
+		CharArraySet stopWords = new CharArraySet(TEST_VERSION_CURRENT, asSet("is", "the", "Time"), false);
+		TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords);
+		assertTokenStreamContents(stream, new string[] {"Now", "The"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopFilt() throws java.io.IOException
+	  public virtual void testStopFilt()
+	  {
+		StringReader reader = new StringReader("Now is The Time");
+		string[] stopWords = new string[] {"is", "the", "Time"};
+		CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
+		TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
+		assertTokenStreamContents(stream, new string[] {"Now", "The"});
+	  }
+
+	  /// <summary>
+	  /// Test Position increments applied by StopFilter with and without enabling this option.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopPositons() throws java.io.IOException
+	  public virtual void testStopPositons()
+	  {
+		StringBuilder sb = new StringBuilder();
+		List<string> a = new List<string>();
+		for (int i = 0; i < 20; i++)
+		{
+		  string w = English.intToEnglish(i).trim();
+		  sb.Append(w).Append(" ");
+		  if (i % 3 != 0)
+		  {
+			  a.Add(w);
+		  }
+		}
+		log(sb.ToString());
+		string[] stopWords = a.ToArray();
+		for (int i = 0; i < a.Count; i++)
+		{
+			log("Stop: " + stopWords[i]);
+		}
+		CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
+		// with increments
+		StringReader reader = new StringReader(sb.ToString());
+		StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
+		doTestStopPositons(stpf,true);
+		// without increments
+		reader = new StringReader(sb.ToString());
+		stpf = new StopFilter(Version.LUCENE_43, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
+		doTestStopPositons(stpf,false);
+		// with increments, concatenating two stop filters
+		List<string> a0 = new List<string>();
+		List<string> a1 = new List<string>();
+		for (int i = 0; i < a.Count; i++)
+		{
+		  if (i % 2 == 0)
+		  {
+			a0.Add(a[i]);
+		  }
+		  else
+		  {
+			a1.Add(a[i]);
+		  }
+		}
+		string[] stopWords0 = a0.ToArray();
+		for (int i = 0; i < a0.Count; i++)
+		{
+			log("Stop0: " + stopWords0[i]);
+		}
+		string[] stopWords1 = a1.ToArray();
+		for (int i = 0; i < a1.Count; i++)
+		{
+			log("Stop1: " + stopWords1[i]);
+		}
+		CharArraySet stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
+		CharArraySet stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
+		reader = new StringReader(sb.ToString());
+		StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
+		stpf0.EnablePositionIncrements = true;
+		StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
+		doTestStopPositons(stpf01,true);
+	  }
+
+	  // LUCENE-3849: make sure after .end() we see the "ending" posInc
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEndStopword() throws Exception
+	  public virtual void testEndStopword()
+	  {
+		CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
+		StopFilter stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
+		assertTokenStreamContents(stpf, new string[] {"test"}, new int[] {0}, new int[] {4}, null, new int[] {1}, null, 7, 1, null, true);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws java.io.IOException
+	  private void doTestStopPositons(StopFilter stpf, bool enableIcrements)
+	  {
+		log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
+		stpf.EnablePositionIncrements = enableIcrements;
+		CharTermAttribute termAtt = stpf.getAttribute(typeof(CharTermAttribute));
+		PositionIncrementAttribute posIncrAtt = stpf.getAttribute(typeof(PositionIncrementAttribute));
+		stpf.reset();
+		for (int i = 0; i < 20; i += 3)
+		{
+		  assertTrue(stpf.incrementToken());
+		  log("Token " + i + ": " + stpf);
+		  string w = English.intToEnglish(i).trim();
+		  assertEquals("expecting token " + i + " to be " + w,w,termAtt.ToString());
+		  assertEquals("all but first token must have position increment of 3",enableIcrements?(i == 0?1:3):1,posIncrAtt.PositionIncrement);
+		}
+		assertFalse(stpf.incrementToken());
+		stpf.end();
+		stpf.close();
+	  }
+
+	  // print debug info depending on VERBOSE
+	  private static void log(string s)
+	  {
+		if (VERBOSE)
+		{
+		  Console.WriteLine(s);
+		}
+	  }
+
+	  // stupid filter that inserts synonym of 'hte' for 'the'
+	  private class MockSynonymFilter : TokenFilter
+	  {
+		  private readonly TestStopFilter outerInstance;
+
+		internal State bufferedState;
+		internal CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+		internal MockSynonymFilter(TestStopFilter outerInstance, TokenStream input) : base(input)
+		{
+			this.outerInstance = outerInstance;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (bufferedState != null)
+		  {
+			restoreState(bufferedState);
+			posIncAtt.PositionIncrement = 0;
+			termAtt.setEmpty().append("hte");
+			bufferedState = null;
+			return true;
+		  }
+		  else if (input.incrementToken())
+		  {
+			if (termAtt.ToString().Equals("the"))
+			{
+			  bufferedState = captureState();
+			}
+			return true;
+		  }
+		  else
+		  {
+			return false;
+		  }
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  bufferedState = null;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFirstPosInc() throws Exception
+	  public virtual void testFirstPosInc()
+	  {
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+
+		assertAnalyzesTo(analyzer, "the quick brown fox", new string[] {"hte", "quick", "brown", "fox"}, new int[] {1, 1, 1, 1});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestStopFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestStopFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			TokenFilter filter = new MockSynonymFilter(outerInstance, tokenizer);
+			StopFilter stopfilter = new StopFilter(Version.LUCENE_43, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+			stopfilter.EnablePositionIncrements = false;
+			return new TokenStreamComponents(tokenizer, stopfilter);
+		  }
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message