Return-Path:
X-Original-To: apmail-lucenenet-commits-archive@www.apache.org
Delivered-To: apmail-lucenenet-commits-archive@www.apache.org
Received: from mail.apache.org (hermes.apache.org [140.211.11.3])
by minotaur.apache.org (Postfix) with SMTP id 950CA184AE
for ;
Thu, 10 Dec 2015 18:38:52 +0000 (UTC)
Received: (qmail 31894 invoked by uid 500); 10 Dec 2015 18:38:52 -0000
Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org
Received: (qmail 31805 invoked by uid 500); 10 Dec 2015 18:38:52 -0000
Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: lucene-net-dev@lucenenet.apache.org
Delivered-To: mailing list commits@lucenenet.apache.org
Received: (qmail 31778 invoked by uid 99); 10 Dec 2015 18:38:52 -0000
Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org)
(140.211.11.23)
by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Dec 2015 18:38:52 +0000
Received: by git1-us-west.apache.org (ASF Mail Server at
git1-us-west.apache.org, from userid 33)
id 4F5C8E1790; Thu, 10 Dec 2015 18:38:52 +0000 (UTC)
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
From: synhershko@apache.org
To: commits@lucenenet.apache.org
Date: Thu, 10 Dec 2015 18:39:11 -0000
Message-Id: <44c8a0c20c1a4f7bb1e300a0a18a5a5e@git.apache.org>
In-Reply-To: <82040c0d687b4700bc749a953375230f@git.apache.org>
References: <82040c0d687b4700bc749a953375230f@git.apache.org>
X-Mailer: ASF-Git Admin Mailer
Subject: [22/27] lucenenet git commit: adding converted analysis common tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
new file mode 100644
index 0000000..73ccad8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -0,0 +1,383 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+
+ using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
+ using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+ using CommonGramsFilter = org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+ using WordDelimiterFilter = org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+ using EdgeNGramTokenizer = org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
+ using NGramTokenFilter = org.apache.lucene.analysis.ngram.NGramTokenFilter;
+ using ShingleFilter = org.apache.lucene.analysis.shingle.ShingleFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using WikipediaTokenizer = org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
+ using SuppressCodecs = org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressCodecs("Direct") public class TestBugInSomething extends org.apache.lucene.analysis.BaseTokenStreamTestCase
+ public class TestBugInSomething : BaseTokenStreamTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+ public virtual void test()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false);
+ CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
+ cas.add("jjp");
+ cas.add("wlmwoknt");
+ cas.add("tcgyreo");
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ builder.add("mtqlpi", "");
+ builder.add("mwoknt", "jjp");
+ builder.add("tcgyreo", "zpfpajyws");
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build();
+ NormalizeCharMap map = builder.build();
+
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);
+ checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestBugInSomething outerInstance;
+
+ private CharArraySet cas;
+ private NormalizeCharMap map;
+
+ public AnalyzerAnonymousInnerClassHelper(TestBugInSomething outerInstance, CharArraySet cas, NormalizeCharMap map)
+ {
+ this.outerInstance = outerInstance;
+ this.cas = cas;
+ this.map = map;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
+ TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
+ return new TokenStreamComponents(t, f);
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ reader = new MockCharFilter(reader, 0);
+ reader = new MappingCharFilter(map, reader);
+ return reader;
+ }
+ }
+
+ internal CharFilter wrappedStream = new CharFilterAnonymousInnerClassHelper(new StringReader("bogus"));
+
+ private class CharFilterAnonymousInnerClassHelper : CharFilter
+ {
+ public CharFilterAnonymousInnerClassHelper(StringReader java) : base(StringReader)
+ {
+ }
+
+
+ public override void mark(int readAheadLimit)
+ {
+ throw new System.NotSupportedException("mark(int)");
+ }
+
+ public override bool markSupported()
+ {
+ throw new System.NotSupportedException("markSupported()");
+ }
+
+ public override int read()
+ {
+ throw new System.NotSupportedException("read()");
+ }
+
+ public override int read(char[] cbuf)
+ {
+ throw new System.NotSupportedException("read(char[])");
+ }
+
+ public override int read(CharBuffer target)
+ {
+ throw new System.NotSupportedException("read(CharBuffer)");
+ }
+
+ public override bool ready()
+ {
+ throw new System.NotSupportedException("ready()");
+ }
+
+ public override void reset()
+ {
+ throw new System.NotSupportedException("reset()");
+ }
+
+ public override long skip(long n)
+ {
+ throw new System.NotSupportedException("skip(long)");
+ }
+
+ public override int correct(int currentOff)
+ {
+ throw new System.NotSupportedException("correct(int)");
+ }
+
+ public override void close()
+ {
+ throw new System.NotSupportedException("close()");
+ }
+
+ public override int read(char[] arg0, int arg1, int arg2)
+ {
+ throw new System.NotSupportedException("read(char[], int, int)");
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWrapping() throws Exception
+ public virtual void testWrapping()
+ {
+ CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
+ try
+ {
+ cs.mark(1);
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("mark(int)", e.Message);
+ }
+
+ try
+ {
+ cs.markSupported();
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("markSupported()", e.Message);
+ }
+
+ try
+ {
+ cs.read();
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("read()", e.Message);
+ }
+
+ try
+ {
+ cs.read(new char[0]);
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("read(char[])", e.Message);
+ }
+
+ try
+ {
+ cs.read(CharBuffer.wrap(new char[0]));
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("read(CharBuffer)", e.Message);
+ }
+
+ try
+ {
+ cs.reset();
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("reset()", e.Message);
+ }
+
+ try
+ {
+ cs.skip(1);
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("skip(long)", e.Message);
+ }
+
+ try
+ {
+ cs.correctOffset(1);
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("correct(int)", e.Message);
+ }
+
+ try
+ {
+ cs.close();
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("close()", e.Message);
+ }
+
+ try
+ {
+ cs.read(new char[0], 0, 0);
+ fail();
+ }
+ catch (Exception e)
+ {
+ assertEquals("read(char[], int, int)", e.Message);
+ }
+ }
+
+ // todo: test framework?
+
+ internal sealed class SopTokenFilter : TokenFilter
+ {
+
+ internal SopTokenFilter(TokenStream input) : base(input)
+ {
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+ public override bool incrementToken()
+ {
+ if (input.incrementToken())
+ {
+ Console.WriteLine(input.GetType().Name + "->" + this.reflectAsString(false));
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+ public override void end()
+ {
+ base.end();
+ Console.WriteLine(input.GetType().Name + ".end()");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+ public override void close()
+ {
+ base.close();
+ Console.WriteLine(input.GetType().Name + ".close()");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+ public override void reset()
+ {
+ base.reset();
+ Console.WriteLine(input.GetType().Name + ".reset()");
+ }
+ }
+
+ // LUCENE-5269
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception
+ public virtual void testUnicodeShinglesAndNgrams()
+ {
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+ checkRandomData(random(), analyzer, 2000);
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestBugInSomething outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestBugInSomething outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94);
+ //TokenStream stream = new SopTokenFilter(tokenizer);
+ TokenStream stream = new ShingleFilter(tokenizer, 5);
+ //stream = new SopTokenFilter(stream);
+ stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83);
+ //stream = new SopTokenFilter(stream);
+ return new TokenStreamComponents(tokenizer, stream);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCuriousWikipediaString() throws Exception
+ public virtual void testCuriousWikipediaString()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new java.util.HashSet<>(java.util.Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);
+ CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final byte table[] = new byte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
+ sbyte[] table = new sbyte[] {-57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20};
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table);
+ checkAnalysisConsistency(random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2
jb");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestBugInSomething outerInstance;
+
+ private CharArraySet protWords;
+ private sbyte[] table;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, sbyte[] table)
+ {
+ this.outerInstance = outerInstance;
+ this.protWords = protWords;
+ this.table = table;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new WikipediaTokenizer(reader);
+ TokenStream stream = new SopTokenFilter(tokenizer);
+ stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, -50, protWords);
+ stream = new SopTokenFilter(stream);
+ return new TokenStreamComponents(tokenizer, stream);
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs
new file mode 100644
index 0000000..9b3f425
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs
@@ -0,0 +1,395 @@
+using System;
+
+namespace org.apache.lucene.analysis.core
+{
+
+ using ClassicAnalyzer = org.apache.lucene.analysis.standard.ClassicAnalyzer;
+ using Document = org.apache.lucene.document.Document;
+ using Field = org.apache.lucene.document.Field;
+ using TextField = org.apache.lucene.document.TextField;
+ using DocsAndPositionsEnum = org.apache.lucene.index.DocsAndPositionsEnum;
+ using DocsEnum = org.apache.lucene.index.DocsEnum;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using IndexWriter = org.apache.lucene.index.IndexWriter;
+ using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig;
+ using MultiFields = org.apache.lucene.index.MultiFields;
+ using Term = org.apache.lucene.index.Term;
+ using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+ using RAMDirectory = org.apache.lucene.store.RAMDirectory;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+ using Version = org.apache.lucene.util.Version;
+
+
+
+ ///
+ /// Copyright 2004 The Apache Software Foundation
+ ///
+ /// Licensed under the Apache License, Version 2.0 (the "License");
+ /// you may not use this file except in compliance with the License.
+ /// You may obtain a copy of the License at
+ ///
+ /// http://www.apache.org/licenses/LICENSE-2.0
+ ///
+ /// Unless required by applicable law or agreed to in writing, software
+ /// distributed under the License is distributed on an "AS IS" BASIS,
+ /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ /// See the License for the specific language governing permissions and
+ /// limitations under the License.
+ ///
+
+ public class TestClassicAnalyzer : BaseTokenStreamTestCase
+ {
+
+ private Analyzer a = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTermLength() throws Exception
+ public virtual void testMaxTermLength()
+ {
+ ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+ sa.MaxTokenLength = 5;
+ assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTermLength2() throws Exception
+ public virtual void testMaxTermLength2()
+ {
+ ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "toolong", "xy", "z"});
+ sa.MaxTokenLength = 5;
+
+ assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTermLength3() throws Exception
+ public virtual void testMaxTermLength3()
+ {
+ char[] chars = new char[255];
+ for (int i = 0;i < 255;i++)
+ {
+ chars[i] = 'a';
+ }
+ string longTerm = new string(chars, 0, 255);
+
+ assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new string[]{"ab", "cd", longTerm, "xy", "z"});
+ assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new string[]{"ab", "cd", "xy", "z"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAlphanumeric() throws Exception
+ public virtual void testAlphanumeric()
+ {
+ // alphanumeric tokens
+ assertAnalyzesTo(a, "B2B", new string[]{"b2b"});
+ assertAnalyzesTo(a, "2B", new string[]{"2b"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnderscores() throws Exception
+ public virtual void testUnderscores()
+ {
+ // underscores are delimiters, but not in email addresses (below)
+ assertAnalyzesTo(a, "word_having_underscore", new string[]{"word", "having", "underscore"});
+ assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new string[]{"word", "underscore", "stopwords"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDelimiters() throws Exception
+ public virtual void testDelimiters()
+ {
+ // other delimiters: "-", "/", ","
+ assertAnalyzesTo(a, "some-dashed-phrase", new string[]{"some", "dashed", "phrase"});
+ assertAnalyzesTo(a, "dogs,chase,cats", new string[]{"dogs", "chase", "cats"});
+ assertAnalyzesTo(a, "ac/dc", new string[]{"ac", "dc"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testApostrophes() throws Exception
+ public virtual void testApostrophes()
+ {
+ // internal apostrophes: O'Reilly, you're, O'Reilly's
+ // possessives are actually removed by StardardFilter, not the tokenizer
+ assertAnalyzesTo(a, "O'Reilly", new string[]{"o'reilly"});
+ assertAnalyzesTo(a, "you're", new string[]{"you're"});
+ assertAnalyzesTo(a, "she's", new string[]{"she"});
+ assertAnalyzesTo(a, "Jim's", new string[]{"jim"});
+ assertAnalyzesTo(a, "don't", new string[]{"don't"});
+ assertAnalyzesTo(a, "O'Reilly's", new string[]{"o'reilly"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTSADash() throws Exception
+ public virtual void testTSADash()
+ {
+ // t and s had been stopwords in Lucene <= 2.0, which made it impossible
+ // to correctly search for these terms:
+ assertAnalyzesTo(a, "s-class", new string[]{"s", "class"});
+ assertAnalyzesTo(a, "t-com", new string[]{"t", "com"});
+ // 'a' is still a stopword:
+ assertAnalyzesTo(a, "a-class", new string[]{"class"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCompanyNames() throws Exception
+ public virtual void testCompanyNames()
+ {
+ // company names
+ assertAnalyzesTo(a, "AT&T", new string[]{"at&t"});
+ assertAnalyzesTo(a, "Excite@Home", new string[]{"excite@home"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLucene1140() throws Exception
+ public virtual void testLucene1140()
+ {
+ try
+ {
+ ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(analyzer, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {""});
+ }
+ catch (System.NullReferenceException)
+ {
+ fail("Should not throw an NPE and it did");
+ }
+
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDomainNames() throws Exception
+ public virtual void testDomainNames()
+ {
+ // Current lucene should not show the bug
+ ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+
+ // domain names
+ assertAnalyzesTo(a2, "www.nutch.org", new string[]{"www.nutch.org"});
+ //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068.
+ // the following should be recognized as HOST:
+ assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {""});
+
+ // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
+ // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
+ // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "" });
+
+ // 2.4 should not show the bug. But, alas, it's also obsolete,
+ // so we check latest released (Robert's gonna break this on 4.0 soon :) )
+ a2 = new ClassicAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEMailAddresses() throws Exception
+ public virtual void testEMailAddresses()
+ {
+ // email addresses, possibly with underscores, periods, etc
+ assertAnalyzesTo(a, "test@example.com", new string[]{"test@example.com"});
+ assertAnalyzesTo(a, "first.lastname@example.com", new string[]{"first.lastname@example.com"});
+ assertAnalyzesTo(a, "first_lastname@example.com", new string[]{"first_lastname@example.com"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumeric() throws Exception
+ public virtual void testNumeric()
+ {
+ // floating point, serial, model numbers, ip addresses, etc.
+ // every other segment must have at least one digit
+ assertAnalyzesTo(a, "21.35", new string[]{"21.35"});
+ assertAnalyzesTo(a, "R2D2 C3PO", new string[]{"r2d2", "c3po"});
+ assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+ assertAnalyzesTo(a, "1-2-3", new string[]{"1-2-3"});
+ assertAnalyzesTo(a, "a1-b2-c3", new string[]{"a1-b2-c3"});
+ assertAnalyzesTo(a, "a1-b-c3", new string[]{"a1-b-c3"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTextWithNumbers() throws Exception
+ public virtual void testTextWithNumbers()
+ {
+ // numbers
+ assertAnalyzesTo(a, "David has 5000 bones", new string[]{"david", "has", "5000", "bones"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVariousText() throws Exception
+ public virtual void testVariousText()
+ {
+ // various
+ assertAnalyzesTo(a, "C embedded developers wanted", new string[]{"c", "embedded", "developers", "wanted"});
+ assertAnalyzesTo(a, "foo bar FOO BAR", new string[]{"foo", "bar", "foo", "bar"});
+ assertAnalyzesTo(a, "foo bar . FOO <> BAR", new string[]{"foo", "bar", "foo", "bar"});
+ assertAnalyzesTo(a, "\"QUOTED\" word", new string[]{"quoted", "word"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAcronyms() throws Exception
+ public virtual void testAcronyms()
+ {
+ // acronyms have their dots stripped
+ assertAnalyzesTo(a, "U.S.A.", new string[]{"usa"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCPlusPlusHash() throws Exception
+ public virtual void testCPlusPlusHash()
+ {
+ // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens.
+ assertAnalyzesTo(a, "C++", new string[]{"c"});
+ assertAnalyzesTo(a, "C#", new string[]{"c"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKorean() throws Exception
+ public virtual void testKorean()
+ {
+ // Korean words
+ assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[]{"안녕하세요", "한글입니다"});
+ }
+
+ // Compliance with the "old" JavaCC-based analyzer, see:
+ // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceFileName() throws Exception
+ public virtual void testComplianceFileName()
+ {
+ assertAnalyzesTo(a, "2004.jpg", new string[]{"2004.jpg"}, new string[]{""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericIncorrect() throws Exception
+ public virtual void testComplianceNumericIncorrect()
+ {
+ assertAnalyzesTo(a, "62.46", new string[]{"62.46"}, new string[]{""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericLong() throws Exception
+ public virtual void testComplianceNumericLong()
+ {
+ assertAnalyzesTo(a, "978-0-94045043-1", new string[]{"978-0-94045043-1"}, new string[]{""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericFile() throws Exception
+ public virtual void testComplianceNumericFile()
+ {
+ assertAnalyzesTo(a, "78academyawards/rules/rule02.html", new string[]{"78academyawards/rules/rule02.html"}, new string[]{""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericWithUnderscores() throws Exception
+ public virtual void testComplianceNumericWithUnderscores()
+ {
+ assertAnalyzesTo(a, "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", new string[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"}, new string[]{""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericWithDash() throws Exception
+ public virtual void testComplianceNumericWithDash()
+ {
+ assertAnalyzesTo(a, "mid-20th", new string[]{"mid-20th"}, new string[]{""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceManyTokens() throws Exception
+ public virtual void testComplianceManyTokens()
+ {
+ assertAnalyzesTo(a, "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm " + "safari-0-sheikh-zayed-grand-mosque.jpg", new string[]{"money.cnn.com", "magazines", "fortune", "fortune", "archive/2007/03/19/8402357", "index.htm", "safari-0-sheikh", "zayed", "grand", "mosque.jpg"}, new string[]{"", "", "", "", "", "", "", "", "", ""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJava14BWCompatibility() throws Exception
+ public virtual void testJava14BWCompatibility()
+ {
+ ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
+ assertAnalyzesTo(sa, "test\u02C6test", new string[] {"test", "test"});
+ }
+
+ ///
+ /// Make sure we skip wicked long terms.
+ ///
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException
+ public virtual void testWickedLongTerm()
+ {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));
+
+ char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
+ Arrays.fill(chars, 'x');
+ Document doc = new Document();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String bigTerm = new String(chars);
+ string bigTerm = new string(chars);
+
+ // This produces a too-long term:
+ string contents = "abc xyz x" + bigTerm + " another term";
+ doc.add(new TextField("content", contents, Field.Store.NO));
+ writer.addDocument(doc);
+
+ // Make sure we can add another normal document
+ doc = new Document();
+ doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir);
+
+ // Make sure all terms < max size were indexed
+ assertEquals(2, reader.docFreq(new Term("content", "abc")));
+ assertEquals(1, reader.docFreq(new Term("content", "bbb")));
+ assertEquals(1, reader.docFreq(new Term("content", "term")));
+ assertEquals(1, reader.docFreq(new Term("content", "another")));
+
+ // Make sure position is still incremented when
+ // massive term is skipped:
+ DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another"));
+ assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ assertEquals(1, tps.freq());
+ assertEquals(3, tps.nextPosition());
+
+ // Make sure the doc that has the massive term is in
+ // the index:
+ assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
+
+ reader.close();
+
+ // Make sure we can add a document with exactly the
+ // maximum length term, and search on that term:
+ doc = new Document();
+ doc.add(new TextField("content", bigTerm, Field.Store.NO));
+ ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+ sa.MaxTokenLength = 100000;
+ writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
+ writer.addDocument(doc);
+ writer.close();
+ reader = IndexReader.open(dir);
+ assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
+ reader.close();
+
+ dir.close();
+ }
+
+ ///
+ /// blast some random strings through the analyzer
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+
+ ///
+ /// blast some random large strings through the analyzer
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+ public virtual void testRandomHugeStrings()
+ {
+ Random random = random();
+ checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs
new file mode 100644
index 0000000..6155918
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs
@@ -0,0 +1,302 @@
+using System;
+
+namespace org.apache.lucene.analysis.core
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+ using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+ using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+ using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+ using TestUtil = org.apache.lucene.util.TestUtil;
+ using Automaton = org.apache.lucene.util.automaton.Automaton;
+ using BasicOperations = org.apache.lucene.util.automaton.BasicOperations;
+ using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+ using State = org.apache.lucene.util.automaton.State;
+ using Transition = org.apache.lucene.util.automaton.Transition;
+
+ ///
+ /// Compares MockTokenizer (which is simple with no optimizations) with equivalent
+ /// core tokenizers (that have optimizations like buffering).
+ ///
+ /// Any tests here need to probably consider unicode version of the JRE (it could
+ /// cause false fails).
+ ///
+ public class TestDuelingAnalyzers : LuceneTestCase
+ {
+ private CharacterRunAutomaton jvmLetter;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+ public override void setUp()
+ {
+ base.setUp();
+ // build an automaton matching this jvm's letter definition
+ State initial = new State();
+ State accept = new State();
+ accept.Accept = true;
+ for (int i = 0; i <= 0x10FFFF; i++)
+ {
+ if (char.IsLetter(i))
+ {
+ initial.addTransition(new Transition(i, i, accept));
+ }
+ }
+ Automaton single = new Automaton(initial);
+ single.reduce();
+ Automaton repeat = BasicOperations.repeat(single);
+ jvmLetter = new CharacterRunAutomaton(repeat);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterAscii() throws Exception
+ public virtual void testLetterAscii()
+ {
+ Random random = random();
+ Analyzer left = new MockAnalyzer(random, jvmLetter, false);
+ Analyzer right = new AnalyzerAnonymousInnerClassHelper(this);
+ for (int i = 0; i < 1000; i++)
+ {
+ string s = TestUtil.randomSimpleString(random);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestDuelingAnalyzers outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestDuelingAnalyzers outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+ // not so useful since its all one token?!
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterAsciiHuge() throws Exception
+ public virtual void testLetterAsciiHuge()
+ {
+ Random random = random();
+ int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
+ MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+ left.MaxTokenLength = 255; // match CharTokenizer's max token length
+ Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this);
+ int numIterations = atLeast(50);
+ for (int i = 0; i < numIterations; i++)
+ {
+ string s = TestUtil.randomSimpleString(random, maxLength);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestDuelingAnalyzers outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestDuelingAnalyzers outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterHtmlish() throws Exception
+ public virtual void testLetterHtmlish()
+ {
+ Random random = random();
+ Analyzer left = new MockAnalyzer(random, jvmLetter, false);
+ Analyzer right = new AnalyzerAnonymousInnerClassHelper3(this);
+ for (int i = 0; i < 1000; i++)
+ {
+ string s = TestUtil.randomHtmlishString(random, 20);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestDuelingAnalyzers outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestDuelingAnalyzers outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterHtmlishHuge() throws Exception
+ public virtual void testLetterHtmlishHuge()
+ {
+ Random random = random();
+ int maxLength = 1024; // this is number of elements, not chars!
+ MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+ left.MaxTokenLength = 255; // match CharTokenizer's max token length
+ Analyzer right = new AnalyzerAnonymousInnerClassHelper4(this);
+ int numIterations = atLeast(50);
+ for (int i = 0; i < numIterations; i++)
+ {
+ string s = TestUtil.randomHtmlishString(random, maxLength);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+ {
+ private readonly TestDuelingAnalyzers outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper4(TestDuelingAnalyzers outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterUnicode() throws Exception
+ public virtual void testLetterUnicode()
+ {
+ Random random = random();
+ Analyzer left = new MockAnalyzer(random(), jvmLetter, false);
+ Analyzer right = new AnalyzerAnonymousInnerClassHelper5(this);
+ for (int i = 0; i < 1000; i++)
+ {
+ string s = TestUtil.randomUnicodeString(random);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+ {
+ private readonly TestDuelingAnalyzers outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper5(TestDuelingAnalyzers outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterUnicodeHuge() throws Exception
+ public virtual void testLetterUnicodeHuge()
+ {
+ Random random = random();
+ int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge
+ MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+ left.MaxTokenLength = 255; // match CharTokenizer's max token length
+ Analyzer right = new AnalyzerAnonymousInnerClassHelper6(this);
+ int numIterations = atLeast(50);
+ for (int i = 0; i < numIterations; i++)
+ {
+ string s = TestUtil.randomUnicodeString(random, maxLength);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper6 : Analyzer
+ {
+ private readonly TestDuelingAnalyzers outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper6(TestDuelingAnalyzers outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+ // we only check a few core attributes here.
+ // TODO: test other things
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception
+ public virtual void assertEquals(string s, TokenStream left, TokenStream right)
+ {
+ left.reset();
+ right.reset();
+ CharTermAttribute leftTerm = left.addAttribute(typeof(CharTermAttribute));
+ CharTermAttribute rightTerm = right.addAttribute(typeof(CharTermAttribute));
+ OffsetAttribute leftOffset = left.addAttribute(typeof(OffsetAttribute));
+ OffsetAttribute rightOffset = right.addAttribute(typeof(OffsetAttribute));
+ PositionIncrementAttribute leftPos = left.addAttribute(typeof(PositionIncrementAttribute));
+ PositionIncrementAttribute rightPos = right.addAttribute(typeof(PositionIncrementAttribute));
+
+ while (left.incrementToken())
+ {
+ assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
+ assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString());
+ assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement);
+ assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
+ assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
+ };
+ assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
+ left.end();
+ right.end();
+ assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
+ left.close();
+ right.close();
+ }
+
+ // TODO: maybe push this out to TestUtil or LuceneTestCase and always use it instead?
+ private static Reader newStringReader(string s)
+ {
+ Random random = random();
+ Reader r = new StringReader(s);
+ if (random.nextBoolean())
+ {
+ r = new MockReaderWrapper(random, r);
+ }
+ return r;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs
new file mode 100644
index 0000000..8af7962
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs
@@ -0,0 +1,263 @@
+using System;
+using System.Diagnostics;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+ using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+ using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+ using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+ using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
+ using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+ using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+ using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+ ///
+ /// Sanity check some things about all factories,
+ /// we do our best to see if we can sanely initialize it with
+ /// no parameters and smoke test it, etc.
+ ///
+ // TODO: move this, TestRandomChains, and TestAllAnalyzersHaveFactories
+ // to an integration test module that sucks in all analysis modules.
+ // currently the only way to do this is via eclipse etc (LUCENE-3974)
+ public class TestFactories : BaseTokenStreamTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws java.io.IOException
+ public virtual void test()
+ {
+ foreach (string tokenizer in TokenizerFactory.availableTokenizers())
+ {
+ doTestTokenizer(tokenizer);
+ }
+
+ foreach (string tokenFilter in TokenFilterFactory.availableTokenFilters())
+ {
+ doTestTokenFilter(tokenFilter);
+ }
+
+ foreach (string charFilter in CharFilterFactory.availableCharFilters())
+ {
+ doTestCharFilter(charFilter);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestTokenizer(String tokenizer) throws java.io.IOException
+ private void doTestTokenizer(string tokenizer)
+ {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class extends org.apache.lucene.analysis.util.TokenizerFactory> factoryClazz = org.apache.lucene.analysis.util.TokenizerFactory.lookupClass(tokenizer);
+ Type> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
+ TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
+ if (factory != null)
+ {
+ // we managed to fully create an instance. check a few more things:
+
+ // if it implements MultiTermAware, sanity check its impl
+ if (factory is MultiTermAwareComponent)
+ {
+ AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent;
+ assertNotNull(mtc);
+ // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
+ assertFalse(mtc is CharFilterFactory);
+ }
+
+ // beast it just a little, it shouldnt throw exceptions:
+ // (it should have thrown them in initialize)
+ checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestTokenFilter(String tokenfilter) throws java.io.IOException
+ private void doTestTokenFilter(string tokenfilter)
+ {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class extends org.apache.lucene.analysis.util.TokenFilterFactory> factoryClazz = org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(tokenfilter);
+ Type> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
+ TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
+ if (factory != null)
+ {
+ // we managed to fully create an instance. check a few more things:
+
+ // if it implements MultiTermAware, sanity check its impl
+ if (factory is MultiTermAwareComponent)
+ {
+ AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent;
+ assertNotNull(mtc);
+ // its not ok to return a charfilter or tokenizer here, this makes no sense
+ assertTrue(mtc is TokenFilterFactory);
+ }
+
+ // beast it just a little, it shouldnt throw exceptions:
+ // (it should have thrown them in initialize)
+ checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestCharFilter(String charfilter) throws java.io.IOException
+ private void doTestCharFilter(string charfilter)
+ {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class extends org.apache.lucene.analysis.util.CharFilterFactory> factoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(charfilter);
+ Type> factoryClazz = CharFilterFactory.lookupClass(charfilter);
+ CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
+ if (factory != null)
+ {
+ // we managed to fully create an instance. check a few more things:
+
+ // if it implements MultiTermAware, sanity check its impl
+ if (factory is MultiTermAwareComponent)
+ {
+ AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent;
+ assertNotNull(mtc);
+ // its not ok to return a tokenizer or tokenfilter here, this makes no sense
+ assertTrue(mtc is CharFilterFactory);
+ }
+
+ // beast it just a little, it shouldnt throw exceptions:
+ // (it should have thrown them in initialize)
+ checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
+ }
+ }
+
+ ///
+ /// tries to initialize a factory with no arguments
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.util.AbstractAnalysisFactory initialize(Class extends org.apache.lucene.analysis.util.AbstractAnalysisFactory> factoryClazz) throws java.io.IOException
+ private AbstractAnalysisFactory initialize(Type factoryClazz) where T1 : org.apache.lucene.analysis.util.AbstractAnalysisFactory
+ {
+ IDictionary args = new Dictionary();
+ args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Constructor extends org.apache.lucene.analysis.util.AbstractAnalysisFactory> ctor;
+ Constructor> ctor;
+ try
+ {
+ ctor = factoryClazz.GetConstructor(typeof(IDictionary));
+ }
+ catch (Exception)
+ {
+ throw new Exception("factory '" + factoryClazz + "' does not have a proper ctor!");
+ }
+
+ AbstractAnalysisFactory factory = null;
+ try
+ {
+ factory = ctor.newInstance(args);
+ }
+ catch (InstantiationException e)
+ {
+ throw new Exception(e);
+ }
+ catch (IllegalAccessException e)
+ {
+ throw new Exception(e);
+ }
+ catch (InvocationTargetException e)
+ {
+ if (e.InnerException is System.ArgumentException)
+ {
+ // its ok if we dont provide the right parameters to throw this
+ return null;
+ }
+ }
+
+ if (factory is ResourceLoaderAware)
+ {
+ try
+ {
+ ((ResourceLoaderAware) factory).inform(new StringMockResourceLoader(""));
+ }
+ catch (IOException)
+ {
+ // its ok if the right files arent available or whatever to throw this
+ }
+ catch (System.ArgumentException)
+ {
+ // is this ok? I guess so
+ }
+ }
+ return factory;
+ }
+
+ // some silly classes just so we can use checkRandomData
+ private TokenizerFactory assertingTokenizer = new TokenizerFactoryAnonymousInnerClassHelper(new Dictionary());
+
+ private class TokenizerFactoryAnonymousInnerClassHelper : TokenizerFactory
+ {
+ public TokenizerFactoryAnonymousInnerClassHelper(Dictionary java) : base(Hashtable)
+ {
+ }
+
+ public override MockTokenizer create(AttributeFactory factory, Reader input)
+ {
+ return new MockTokenizer(factory, input);
+ }
+ }
+
+ private class FactoryAnalyzer : Analyzer
+ {
+ internal readonly TokenizerFactory tokenizer;
+ internal readonly CharFilterFactory charFilter;
+ internal readonly TokenFilterFactory tokenfilter;
+
+ internal FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter)
+ {
+ Debug.Assert(tokenizer != null);
+ this.tokenizer = tokenizer;
+ this.charFilter = charFilter;
+ this.tokenfilter = tokenfilter;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tf = tokenizer.create(reader);
+ if (tokenfilter != null)
+ {
+ return new TokenStreamComponents(tf, tokenfilter.create(tf));
+ }
+ else
+ {
+ return new TokenStreamComponents(tf);
+ }
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ if (charFilter != null)
+ {
+ return charFilter.create(reader);
+ }
+ else
+ {
+ return reader;
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs
new file mode 100644
index 0000000..ecde6df
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs
@@ -0,0 +1,143 @@
+namespace org.apache.lucene.analysis.core
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+ using Document = org.apache.lucene.document.Document;
+ using Field = org.apache.lucene.document.Field;
+ using StringField = org.apache.lucene.document.StringField;
+ using TextField = org.apache.lucene.document.TextField;
+ using DirectoryReader = org.apache.lucene.index.DirectoryReader;
+ using DocsEnum = org.apache.lucene.index.DocsEnum;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using IndexWriter = org.apache.lucene.index.IndexWriter;
+ using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig;
+ using MultiFields = org.apache.lucene.index.MultiFields;
+ using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+ using IndexSearcher = org.apache.lucene.search.IndexSearcher;
+ using Directory = org.apache.lucene.store.Directory;
+ using RAMDirectory = org.apache.lucene.store.RAMDirectory;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+ using IOUtils = org.apache.lucene.util.IOUtils;
+ using TestUtil = org.apache.lucene.util.TestUtil;
+
+ public class TestKeywordAnalyzer : BaseTokenStreamTestCase
+ {
+
+ private Directory directory;
+ private IndexSearcher searcher;
+ private IndexReader reader;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+ public override void setUp()
+ {
+ base.setUp();
+ directory = newDirectory();
+ IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)));
+
+ Document doc = new Document();
+ doc.add(new StringField("partnum", "Q36", Field.Store.YES));
+ doc.add(new TextField("description", "Illidium Space Modulator", Field.Store.YES));
+ writer.addDocument(doc);
+
+ writer.close();
+
+ reader = DirectoryReader.open(directory);
+ searcher = newSearcher(reader);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void tearDown() throws Exception
+ public override void tearDown()
+ {
+ reader.close();
+ directory.close();
+ base.tearDown();
+ }
+
+ /*
+ public void testPerFieldAnalyzer() throws Exception {
+ PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT));
+ analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
+
+ QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, "description", analyzer);
+ Query query = queryParser.parse("partnum:Q36 AND SPACE");
+
+ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals("Q36 kept as-is",
+ "+partnum:Q36 +space", query.toString("description"));
+ assertEquals("doc found!", 1, hits.length);
+ }
+ */
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMutipleDocument() throws Exception
+ public virtual void testMutipleDocument()
+ {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer()));
+ Document doc = new Document();
+ doc.add(new TextField("partnum", "Q36", Field.Store.YES));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new TextField("partnum", "Q37", Field.Store.YES));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader reader = DirectoryReader.open(dir);
+ DocsEnum td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q36"), MultiFields.getLiveDocs(reader), null, 0);
+ assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q37"), MultiFields.getLiveDocs(reader), null, 0);
+ assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ }
+
+ // LUCENE-1441
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+ public virtual void testOffsets()
+ {
+ TokenStream stream = (new KeywordAnalyzer()).tokenStream("field", new StringReader("abcd"));
+ try
+ {
+ OffsetAttribute offsetAtt = stream.addAttribute(typeof(OffsetAttribute));
+ stream.reset();
+ assertTrue(stream.incrementToken());
+ assertEquals(0, offsetAtt.startOffset());
+ assertEquals(4, offsetAtt.endOffset());
+ assertFalse(stream.incrementToken());
+ stream.end();
+ }
+ finally
+ {
+ IOUtils.closeWhileHandlingException(stream);
+ }
+ }
+
+ ///
+ /// blast some random strings through the analyzer
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new KeywordAnalyzer(), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file