lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [01/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:38:50 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/master 812e1c541 -> 092aab40f


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerTest.cs
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerTest.cs
new file mode 100644
index 0000000..ff0bfd4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerTest.cs
@@ -0,0 +1,180 @@
+using System;
+using System.Collections.Generic;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace org.apache.lucene.analysis.wikipedia
+{
+
+
+	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted
to C#:
+//	import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*;
+
+	/// <summary>
+	/// Basic Tests for <seealso cref="WikipediaTokenizer"/>
+	/// 
+	/// </summary>
+	public class WikipediaTokenizerTest : BaseTokenStreamTestCase
+	{
+	  protected internal const string LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org
here again] [[Category:a b c d]]";
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSimple() throws Exception
+	  public virtual void testSimple()
+	  {
+		string text = "This is a [[Category:foo]]";
+		WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text));
+		assertTokenStreamContents(tf, new string[] {"This", "is", "a", "foo"}, new int[] {0, 5,
8, 21}, new int[] {4, 7, 9, 24}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
CATEGORY}, new int[] {1, 1, 1, 1}, text.Length);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHandwritten() throws Exception
+	  public virtual void testHandwritten()
+	  {
+		// make sure all tokens are in only one type
+		string test = "[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar
none withstanding]] " + "Category This is (parens) This is a [[link]]  This is an external
URL [http://lucene.apache.org] " + "Here is ''italics'' and ''more italics'', '''bold''' and
'''''five quotes''''' " + " This is a [[link|display info]]  This is a period.  Here is $3.25
and here is 3.50.  Here's Johnny.  " + "==heading== ===sub head=== followed by some text 
[[Category:blah| ]] " + "''[[Category:ital_cat]]''  here is some that is ''italics [[Category:foo]]
but is never closed." + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]]
and this" + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html
Test Test]" + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref>
<sup>martian</sup> <span class=\"glue\">code</span>";
+
+		WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
+		assertTokenStreamContents(tf, new string[] {"link", "This", "is", "a", "foo", "Category",
"This", "is", "a", "linked", "bar", "none", "withstanding", "Category", "This", "is", "parens",
"This", "is", "a", "link", "This", "is", "an", "external", "URL", "http://lucene.apache.org",
"Here", "is", "italics", "and", "more", "italics", "bold", "and", "five", "quotes", "This",
"is", "a", "link", "display", "info", "This", "is", "a", "period", "Here", "is", "3.25", "and",
"here", "is", "3.50", "Here's", "Johnny", "heading", "sub", "head", "followed", "by", "some",
"text", "blah", "ital", "cat", "here", "is", "some", "that", "is", "italics", "foo", "but",
"is", "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo", "and", "this",
"http://foo.boo.com/test/test/", "Test", "Test", "http://foo.boo.com/test/test/test.html",
"Test", "Test", "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test", "Citation",
"martian", "code"}, new string[] {INTERNAL_LINK, "<ALPHANUM>", 
 "<ALPHANUM>", "<ALPHANUM>", CATEGORY, "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY, CATEGORY, CATEGORY,
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK, "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", EXTERNAL_LINK_URL, "<ALPHANUM>",
"<ALPHANUM>", ITALICS, "<ALPHANUM>", ITALICS, ITALICS, BOLD, "<ALPHANUM>",
BOLD_ITALICS, BOLD_ITALICS, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK,
INTERNAL_LINK, INTERNAL_LINK, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<APOSTROPHE>", "<ALPHANUM>",
HEADING, SUB_HEADING, SUB_HEADING, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", CATEGORY, CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", ITALICS, CATEGORY, "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD, CATEGOR
 Y, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD_ITALICS, CATEGORY, "<ALPHANUM>",
"<ALPHANUM>", EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK,
EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION, "<ALPHANUM>",
"<ALPHANUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLinkPhrases() throws Exception
+	  public virtual void testLinkPhrases()
+	  {
+		WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES));
+		checkLinkPhrases(tf);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void checkLinkPhrases(WikipediaTokenizer tf) throws java.io.IOException
+	  private void checkLinkPhrases(WikipediaTokenizer tf)
+	  {
+		assertTokenStreamContents(tf, new string[] {"click", "link", "here", "again", "click",
"http://lucene.apache.org", "here", "again", "a", "b", "c", "d"}, new int[] {1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLinks() throws Exception
+	  public virtual void testLinks()
+	  {
+		string test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c
here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
+		WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
+		assertTokenStreamContents(tf, new string[] {"http://lucene.apache.org/java/docs/index.html#news",
"here", "http://lucene.apache.org/java/docs/index.html?b=c", "here", "https://lucene.apache.org/java/docs/index.html?b=c",
"here"}, new string[] {EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK,
EXTERNAL_LINK_URL, EXTERNAL_LINK});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLucene1133() throws Exception
+	  public virtual void testLucene1133()
+	  {
+		ISet<string> untoks = new HashSet<string>();
+		untoks.Add(WikipediaTokenizer.CATEGORY);
+		untoks.Add(WikipediaTokenizer.ITALICS);
+		//should be exactly the same, regardless of untoks
+		WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES), WikipediaTokenizer.TOKENS_ONLY,
untoks);
+		checkLinkPhrases(tf);
+		string test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics
here'' something ''more italics'' [[Category:h   i   j]]";
+		tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY,
untoks);
+		assertTokenStreamContents(tf, new string[] {"a b c d", "e f g", "link", "here", "link",
"there", "italics here", "something", "more italics", "h   i   j"}, new int[] {11, 32, 42,
47, 56, 61, 71, 86, 98, 124}, new int[] {18, 37, 46, 51, 60, 66, 83, 95, 110, 133}, new int[]
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBoth() throws Exception
+	  public virtual void testBoth()
+	  {
+		ISet<string> untoks = new HashSet<string>();
+		untoks.Add(WikipediaTokenizer.CATEGORY);
+		untoks.Add(WikipediaTokenizer.ITALICS);
+		string test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics
here'' something ''more italics'' [[Category:h   i   j]]";
+		//should output all the indivual tokens plus the untokenized tokens as well.  Untokenized
tokens
+		WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH,
untoks);
+		assertTokenStreamContents(tf, new string[] {"a b c d", "a", "b", "c", "d", "e f g", "e",
"f", "g", "link", "here", "link", "there", "italics here", "italics", "here", "something",
"more italics", "more", "italics", "h   i   j", "h", "i", "j"}, new int[] {11, 11, 13, 15,
17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132}, new
int[] {18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110,
133, 125, 129, 133}, new int[] {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
1, 0, 1, 1});
+
+		// now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase?
+		tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
+		int[] expectedFlags = new int[] {UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG,
0, 0, 0, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG,
0, 0, 0};
+		FlagsAttribute flagsAtt = tf.addAttribute(typeof(FlagsAttribute));
+		tf.reset();
+		for (int i = 0; i < expectedFlags.Length; i++)
+		{
+		  assertTrue(tf.incrementToken());
+		  assertEquals("flags " + i, expectedFlags[i], flagsAtt.Flags);
+		}
+		assertFalse(tf.incrementToken());
+		tf.close();
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly WikipediaTokenizerTest outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(WikipediaTokenizerTest outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName,
Reader reader)
+		  {
+			Tokenizer tokenizer = new WikipediaTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+	  /// <summary>
+	  /// blast some random large strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		Random random = random();
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+		checkRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly WikipediaTokenizerTest outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(WikipediaTokenizerTest outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName,
Reader reader)
+		  {
+			Tokenizer tokenizer = new WikipediaTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyAnalyzer.cs
new file mode 100644
index 0000000..5f2f274
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyAnalyzer.cs
@@ -0,0 +1,126 @@
+namespace org.apache.lucene.collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Analyzer = org.apache.lucene.analysis.Analyzer;
+	using CollationTestBase = org.apache.lucene.analysis.CollationTestBase;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using SuppressCodecs = org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent
attributes:
+//ORIGINAL LINE: @SuppressCodecs("Lucene3x") public class TestCollationKeyAnalyzer extends
org.apache.lucene.analysis.CollationTestBase
+	public class TestCollationKeyAnalyzer : CollationTestBase
+	{
+		private bool InstanceFieldsInitialized = false;
+
+		public TestCollationKeyAnalyzer()
+		{
+			if (!InstanceFieldsInitialized)
+			{
+				InitializeInstanceFields();
+				InstanceFieldsInitialized = true;
+			}
+		}
+
+		private void InitializeInstanceFields()
+		{
+			analyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator);
+			firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
+			firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
+			secondRangeBeginning = new BytesRef(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
+			secondRangeEnd = new BytesRef(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+		}
+
+	  // the sort order of Ø versus U depends on the version of the rules being used
+	  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
+	  // its not used in english.
+	  private bool oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") <
0;
+
+	  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+	  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+	  // characters properly.
+	  private Collator collator = Collator.getInstance(new Locale("ar"));
+	  private Analyzer analyzer;
+
+	  private BytesRef firstRangeBeginning;
+	  private BytesRef firstRangeEnd;
+	  private BytesRef secondRangeBeginning;
+	  private BytesRef secondRangeEnd;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInitVars() throws Exception
+	  public virtual void testInitVars()
+	  {
+		CollationKey sortKey = collator.getCollationKey(firstRangeBeginningOriginal);
+		sbyte[] data = sortKey.toByteArray();
+		BytesRef r = new BytesRef(data);
+
+	  }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiRangeFilterCollating() throws Exception
+	  public virtual void testFarsiRangeFilterCollating()
+	  {
+		testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning,
secondRangeEnd);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiRangeQueryCollating() throws Exception
+	  public virtual void testFarsiRangeQueryCollating()
+	  {
+		testFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning,
secondRangeEnd);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiTermRangeQuery() throws Exception
+	  public virtual void testFarsiTermRangeQuery()
+	  {
+		testFarsiTermRangeQuery(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning,
secondRangeEnd);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCollationKeySort() throws Exception
+	  public virtual void testCollationKeySort()
+	  {
+		Analyzer usAnalyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
+		Analyzer franceAnalyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
+		Analyzer swedenAnalyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new
Locale("sv", "se")));
+		Analyzer denmarkAnalyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new
Locale("da", "dk")));
+
+		// The ICU Collator and Sun java.text.Collator implementations differ in their
+		// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
+		testCollationKeySort(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst
? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testThreadSafe() throws Exception
+	  public virtual void testThreadSafe()
+	  {
+		int iters = 20 * RANDOM_MULTIPLIER;
+		for (int i = 0; i < iters; i++)
+		{
+		  Collator collator = Collator.getInstance(Locale.GERMAN);
+		  collator.Strength = Collator.PRIMARY;
+		  assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilter.cs
new file mode 100644
index 0000000..47a48c6
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilter.cs
@@ -0,0 +1,125 @@
+using System;
+
+namespace org.apache.lucene.collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using org.apache.lucene.analysis;
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// @deprecated remove when CollationKeyFilter is removed. 
+	[Obsolete("remove when CollationKeyFilter is removed.")]
+	public class TestCollationKeyFilter : CollationTestBase
+	{
+		private bool InstanceFieldsInitialized = false;
+
+		public TestCollationKeyFilter()
+		{
+			if (!InstanceFieldsInitialized)
+			{
+				InitializeInstanceFields();
+				InstanceFieldsInitialized = true;
+			}
+		}
+
+		private void InitializeInstanceFields()
+		{
+			analyzer = new TestAnalyzer(this, collator);
+			firstRangeBeginning = new BytesRef(encodeCollationKey(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
+			firstRangeEnd = new BytesRef(encodeCollationKey(collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
+			secondRangeBeginning = new BytesRef(encodeCollationKey(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
+			secondRangeEnd = new BytesRef(encodeCollationKey(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
+		}
+
+	  // the sort order of Ø versus U depends on the version of the rules being used
+	  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
+	  // its not used in english.
+	  internal bool oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") <
0;
+
+	  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+	  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+	  // characters properly.
+	  private Collator collator = Collator.getInstance(new Locale("ar"));
+	  private Analyzer analyzer;
+
+	  private BytesRef firstRangeBeginning;
+	  private BytesRef firstRangeEnd;
+	  private BytesRef secondRangeBeginning;
+	  private BytesRef secondRangeEnd;
+
+
+	  public sealed class TestAnalyzer : Analyzer
+	  {
+		  private readonly TestCollationKeyFilter outerInstance;
+
+		internal Collator _collator;
+
+		internal TestAnalyzer(TestCollationKeyFilter outerInstance, Collator collator)
+		{
+			this.outerInstance = outerInstance;
+		  _collator = collator;
+		}
+
+		public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+		  Tokenizer result = new KeywordTokenizer(reader);
+		  return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator));
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiRangeFilterCollating() throws Exception
+	  public virtual void testFarsiRangeFilterCollating()
+	  {
+		testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning,
secondRangeEnd);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiRangeQueryCollating() throws Exception
+	  public virtual void testFarsiRangeQueryCollating()
+	  {
+		testFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning,
secondRangeEnd);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiTermRangeQuery() throws Exception
+	  public virtual void testFarsiTermRangeQuery()
+	  {
+		testFarsiTermRangeQuery(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning,
secondRangeEnd);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCollationKeySort() throws Exception
+	  public virtual void testCollationKeySort()
+	  {
+		Analyzer usAnalyzer = new TestAnalyzer(this, Collator.getInstance(Locale.US));
+		Analyzer franceAnalyzer = new TestAnalyzer(this, Collator.getInstance(Locale.FRANCE));
+		Analyzer swedenAnalyzer = new TestAnalyzer(this, Collator.getInstance(new Locale("sv",
"se")));
+		Analyzer denmarkAnalyzer = new TestAnalyzer(this, Collator.getInstance(new Locale("da",
"dk")));
+
+		// The ICU Collator and Sun java.text.Collator implementations differ in their
+		// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
+		testCollationKeySort(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst
? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilterFactory.cs
b/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilterFactory.cs
new file mode 100644
index 0000000..72ae784
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Collation/TestCollationKeyFilterFactory.cs
@@ -0,0 +1,152 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using MockTokenizer = org.apache.lucene.analysis.MockTokenizer;
+	using TokenStream = org.apache.lucene.analysis.TokenStream;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	public class TestCollationKeyFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+	  /*
+	   * Turkish has some funny casing.
+	   * This test shows how you can solve this kind of thing easily with collation.
+	   * Instead of using LowerCaseFilter, use a turkish collator with primary strength.
+	   * Then things will sort and match correctly.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasicUsage() throws Exception
+	  public virtual void testBasicUsage()
+	  {
+		string turkishUpperCase = "I WİLL USE TURKİSH CASING";
+		string turkishLowerCase = "ı will use turkish casıng";
+		TokenFilterFactory factory = tokenFilterFactory("CollationKey", "language", "tr", "strength",
"primary");
+		TokenStream tsUpper = factory.create(new MockTokenizer(new StringReader(turkishUpperCase),
MockTokenizer.KEYWORD, false));
+		TokenStream tsLower = factory.create(new MockTokenizer(new StringReader(turkishLowerCase),
MockTokenizer.KEYWORD, false));
+		assertCollatesToSame(tsUpper, tsLower);
+	  }
+
+	  /*
+	   * Test usage of the decomposition option for unicode normalization.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNormalization() throws Exception
+	  public virtual void testNormalization()
+	  {
+		string turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
+		string turkishLowerCase = "ı will use turkish casıng";
+		TokenFilterFactory factory = tokenFilterFactory("CollationKey", "language", "tr", "strength",
"primary", "decomposition", "canonical");
+		TokenStream tsUpper = factory.create(new MockTokenizer(new StringReader(turkishUpperCase),
MockTokenizer.KEYWORD, false));
+		TokenStream tsLower = factory.create(new MockTokenizer(new StringReader(turkishLowerCase),
MockTokenizer.KEYWORD, false));
+		assertCollatesToSame(tsUpper, tsLower);
+	  }
+
+	  /*
+	   * Test usage of the K decomposition option for unicode normalization.
+	   * This works even with identical strength.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullDecomposition() throws Exception
+	  public virtual void testFullDecomposition()
+	  {
+		string fullWidth = "Testing";
+		string halfWidth = "Testing";
+		TokenFilterFactory factory = tokenFilterFactory("CollationKey", "language", "zh", "strength",
"identical", "decomposition", "full");
+		TokenStream tsFull = factory.create(new MockTokenizer(new StringReader(fullWidth), MockTokenizer.KEYWORD,
false));
+		TokenStream tsHalf = factory.create(new MockTokenizer(new StringReader(halfWidth), MockTokenizer.KEYWORD,
false));
+		assertCollatesToSame(tsFull, tsHalf);
+	  }
+
+	  /*
+	   * Test secondary strength, for english case is not significant.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSecondaryStrength() throws Exception
+	  public virtual void testSecondaryStrength()
+	  {
+		string upperCase = "TESTING";
+		string lowerCase = "testing";
+		TokenFilterFactory factory = tokenFilterFactory("CollationKey", "language", "en", "strength",
"secondary", "decomposition", "no");
+		TokenStream tsUpper = factory.create(new MockTokenizer(new StringReader(upperCase), MockTokenizer.KEYWORD,
false));
+		TokenStream tsLower = factory.create(new MockTokenizer(new StringReader(lowerCase), MockTokenizer.KEYWORD,
false));
+		assertCollatesToSame(tsUpper, tsLower);
+	  }
+
+	  /*
+	   * For german, you might want oe to sort and match with o umlaut.
+	   * This is not the default, but you can make a customized ruleset to do this.
+	   *
+	   * The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
+	   *  http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCustomRules() throws Exception
+	  public virtual void testCustomRules()
+	  {
+		RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new Locale("de",
"DE"));
+
+		string DIN5007_2_tailorings = "& ae , a\u0308 & AE , A\u0308" + "& oe , o\u0308
& OE , O\u0308" + "& ue , u\u0308 & UE , u\u0308";
+
+		RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.Rules + DIN5007_2_tailorings);
+		string tailoredRules = tailoredCollator.Rules;
+		//
+		// at this point, you would save these tailoredRules to a file, 
+		// and use the custom parameter.
+		//
+		string germanUmlaut = "Töne";
+		string germanOE = "Toene";
+		IDictionary<string, string> args = new Dictionary<string, string>();
+		args["custom"] = "rules.txt";
+		args["strength"] = "primary";
+		CollationKeyFilterFactory factory = new CollationKeyFilterFactory(args);
+		factory.inform(new StringMockResourceLoader(tailoredRules));
+		TokenStream tsUmlaut = factory.create(new MockTokenizer(new StringReader(germanUmlaut),
MockTokenizer.KEYWORD, false));
+		TokenStream tsOE = factory.create(new MockTokenizer(new StringReader(germanOE), MockTokenizer.KEYWORD,
false));
+
+		assertCollatesToSame(tsUmlaut, tsOE);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void assertCollatesToSame(org.apache.lucene.analysis.TokenStream
stream1, org.apache.lucene.analysis.TokenStream stream2) throws java.io.IOException
+	  private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
+	  {
+		stream1.reset();
+		stream2.reset();
+		CharTermAttribute term1 = stream1.addAttribute(typeof(CharTermAttribute));
+		CharTermAttribute term2 = stream2.addAttribute(typeof(CharTermAttribute));
+		assertTrue(stream1.incrementToken());
+		assertTrue(stream2.incrementToken());
+		assertEquals(term1.ToString(), term2.ToString());
+		assertFalse(stream1.incrementToken());
+		assertFalse(stream2.incrementToken());
+		stream1.end();
+		stream2.end();
+		stream1.close();
+		stream2.close();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/DateTimeHelperClass.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/DateTimeHelperClass.cs b/src/Lucene.Net.Tests.Analysis.Common/DateTimeHelperClass.cs
new file mode 100644
index 0000000..0bd49c4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/DateTimeHelperClass.cs
@@ -0,0 +1,15 @@
+//---------------------------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2015 Tangible Software Solutions Inc.
+//	This class can be used by anyone provided that the copyright notice remains intact.
+//
+//	This class is used to replace calls to Java's System.currentTimeMillis with the C# equivalent.
+//	Unix time is defined as the number of seconds that have elapsed since midnight UTC, 1
January 1970.
+//---------------------------------------------------------------------------------------------------------
+internal static class DateTimeHelperClass
+{
+	private static readonly System.DateTime Jan1st1970 = new System.DateTime(1970, 1, 1, 0,
0, 0, System.DateTimeKind.Utc);
+	internal static long CurrentUnixTimeMillis()
+	{
+		return (long)(System.DateTime.UtcNow - Jan1st1970).TotalMilliseconds;
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/HashMapHelperClass.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/HashMapHelperClass.cs b/src/Lucene.Net.Tests.Analysis.Common/HashMapHelperClass.cs
new file mode 100644
index 0000000..0c925be
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/HashMapHelperClass.cs
@@ -0,0 +1,26 @@
+//---------------------------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2015 Tangible Software Solutions Inc.
+//	This class can be used by anyone provided that the copyright notice remains intact.
+//
+//	This class is used to replace calls to some Java HashMap or Hashtable methods.
+//---------------------------------------------------------------------------------------------------------
+using System.Collections.Generic;
+internal static class HashMapHelperClass
+{
+	internal static HashSet<KeyValuePair<TKey, TValue>> SetOfKeyValuePairs<TKey,
TValue>(this IDictionary<TKey, TValue> dictionary)
+	{
+		HashSet<KeyValuePair<TKey, TValue>> entries = new HashSet<KeyValuePair<TKey,
TValue>>();
+		foreach (KeyValuePair<TKey, TValue> keyValuePair in dictionary)
+		{
+			entries.Add(keyValuePair);
+		}
+		return entries;
+	}
+
+	internal static TValue GetValueOrNull<TKey, TValue>(this IDictionary<TKey, TValue>
dictionary, TKey key)
+	{
+		TValue ret;
+		dictionary.TryGetValue(key, out ret);
+		return ret;
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/StringHelperClass.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/StringHelperClass.cs b/src/Lucene.Net.Tests.Analysis.Common/StringHelperClass.cs
new file mode 100644
index 0000000..3bcece0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/StringHelperClass.cs
@@ -0,0 +1,90 @@
+//-------------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2015 Tangible Software Solutions Inc.
+//	This class can be used by anyone provided that the copyright notice remains intact.
+//
+//	This class is used to convert some aspects of the Java String class.
+//-------------------------------------------------------------------------------------------
+internal static class StringHelperClass
+{
+	//----------------------------------------------------------------------------------
+	//	This method replaces the Java String.substring method when 'start' is a
+	//	method call or calculated value to ensure that 'start' is obtained just once.
+	//----------------------------------------------------------------------------------
+	internal static string SubstringSpecial(this string self, int start, int end)
+	{
+		return self.Substring(start, end - start);
+	}
+
+	//------------------------------------------------------------------------------------
+	//	This method is used to replace calls to the 2-arg Java String.startsWith method.
+	//------------------------------------------------------------------------------------
+	internal static bool StartsWith(this string self, string prefix, int toffset)
+	{
+		return self.IndexOf(prefix, toffset, System.StringComparison.Ordinal) == toffset;
+	}
+
+	//------------------------------------------------------------------------------
+	//	This method is used to replace most calls to the Java String.split method.
+	//------------------------------------------------------------------------------
+	internal static string[] Split(this string self, string regexDelimiter, bool trimTrailingEmptyStrings)
+	{
+		string[] splitArray = System.Text.RegularExpressions.Regex.Split(self, regexDelimiter);
+
+		if (trimTrailingEmptyStrings)
+		{
+			if (splitArray.Length > 1)
+			{
+				for (int i = splitArray.Length; i > 0; i--)
+				{
+					if (splitArray[i - 1].Length > 0)
+					{
+						if (i < splitArray.Length)
+							System.Array.Resize(ref splitArray, i);
+
+						break;
+					}
+				}
+			}
+		}
+
+		return splitArray;
+	}
+
+	//-----------------------------------------------------------------------------
+	//	These methods are used to replace calls to some Java String constructors.
+	//-----------------------------------------------------------------------------
+	internal static string NewString(sbyte[] bytes)
+	{
+		return NewString(bytes, 0, bytes.Length);
+	}
+	internal static string NewString(sbyte[] bytes, int index, int count)
+	{
+		return System.Text.Encoding.UTF8.GetString((byte[])(object)bytes, index, count);
+	}
+	internal static string NewString(sbyte[] bytes, string encoding)
+	{
+		return NewString(bytes, 0, bytes.Length, encoding);
+	}
+	internal static string NewString(sbyte[] bytes, int index, int count, string encoding)
+	{
+		return System.Text.Encoding.GetEncoding(encoding).GetString((byte[])(object)bytes, index,
count);
+	}
+
+	//--------------------------------------------------------------------------------
+	//	These methods are used to replace calls to the Java String.getBytes methods.
+	//--------------------------------------------------------------------------------
+	internal static sbyte[] GetBytes(this string self)
+	{
+		return GetSBytesForEncoding(System.Text.Encoding.UTF8, self);
+	}
+	internal static sbyte[] GetBytes(this string self, string encoding)
+	{
+		return GetSBytesForEncoding(System.Text.Encoding.GetEncoding(encoding), self);
+	}
+	private static sbyte[] GetSBytesForEncoding(System.Text.Encoding encoding, string s)
+	{
+		sbyte[] sbytes = new sbyte[encoding.GetByteCount(s)];
+		encoding.GetBytes(s, 0, s.Length, (byte[])(object)sbytes, 0);
+		return sbytes;
+	}
+}
\ No newline at end of file


Mime
View raw message