Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 950CA184AE for ; Thu, 10 Dec 2015 18:38:52 +0000 (UTC) Received: (qmail 31894 invoked by uid 500); 10 Dec 2015 18:38:52 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 31805 invoked by uid 500); 10 Dec 2015 18:38:52 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 31778 invoked by uid 99); 10 Dec 2015 18:38:52 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Dec 2015 18:38:52 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4F5C8E1790; Thu, 10 Dec 2015 18:38:52 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Thu, 10 Dec 2015 18:39:11 -0000 Message-Id: <44c8a0c20c1a4f7bb1e300a0a18a5a5e@git.apache.org> In-Reply-To: <82040c0d687b4700bc749a953375230f@git.apache.org> References: <82040c0d687b4700bc749a953375230f@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [22/27] lucenenet git commit: adding converted analysis common tests http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs new file mode 100644 index 0000000..73ccad8 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs @@ -0,0 +1,383 @@ +using System; +using System.Collections.Generic; + +namespace org.apache.lucene.analysis.core +{ + + + using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter; + using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap; + using CommonGramsFilter = org.apache.lucene.analysis.commongrams.CommonGramsFilter; + using WordDelimiterFilter = org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; + using EdgeNGramTokenizer = org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; + using NGramTokenFilter = org.apache.lucene.analysis.ngram.NGramTokenFilter; + using ShingleFilter = org.apache.lucene.analysis.shingle.ShingleFilter; + using CharArraySet = org.apache.lucene.analysis.util.CharArraySet; + using WikipediaTokenizer = org.apache.lucene.analysis.wikipedia.WikipediaTokenizer; + using SuppressCodecs = org.apache.lucene.util.LuceneTestCase.SuppressCodecs; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: +//ORIGINAL LINE: @SuppressCodecs("Direct") public class TestBugInSomething extends org.apache.lucene.analysis.BaseTokenStreamTestCase + public class TestBugInSomething : BaseTokenStreamTestCase + { +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void test() throws Exception + public virtual void test() + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false); + CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); + cas.add("jjp"); + cas.add("wlmwoknt"); + cas.add("tcgyreo"); + +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder(); + NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); + builder.add("mtqlpi", ""); + builder.add("mwoknt", "jjp"); + builder.add("tcgyreo", "zpfpajyws"); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build(); + NormalizeCharMap map = builder.build(); + + Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); + checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj"); + } + + private class AnalyzerAnonymousInnerClassHelper : Analyzer + { + private readonly TestBugInSomething outerInstance; + + private CharArraySet cas; + private NormalizeCharMap map; + + public AnalyzerAnonymousInnerClassHelper(TestBugInSomething outerInstance, CharArraySet cas, NormalizeCharMap map) + { + this.outerInstance = outerInstance; + this.cas = cas; + this.map = map; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65); + TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); + return new TokenStreamComponents(t, f); + } + + protected internal override Reader initReader(string fieldName, Reader reader) + { + reader = new MockCharFilter(reader, 0); + reader = new MappingCharFilter(map, reader); + return reader; + } + } + + internal CharFilter wrappedStream = new CharFilterAnonymousInnerClassHelper(new StringReader("bogus")); + + private class CharFilterAnonymousInnerClassHelper : CharFilter + { + public CharFilterAnonymousInnerClassHelper(StringReader java) : base(StringReader) + { + } + + + public override void mark(int readAheadLimit) + { + throw new System.NotSupportedException("mark(int)"); + } + + public override bool markSupported() + { + throw new System.NotSupportedException("markSupported()"); + } + + public override int read() + { + throw new System.NotSupportedException("read()"); + } + + public override int read(char[] cbuf) + { + throw new System.NotSupportedException("read(char[])"); + } + + public override int read(CharBuffer target) + { + throw new System.NotSupportedException("read(CharBuffer)"); + } + + public override bool ready() + { + throw new System.NotSupportedException("ready()"); + } + + public override void reset() + { + throw new System.NotSupportedException("reset()"); + } + + public override long skip(long n) + { + throw new System.NotSupportedException("skip(long)"); + } + + public override int correct(int currentOff) + { + throw new System.NotSupportedException("correct(int)"); + } + + public override void close() + { + throw new System.NotSupportedException("close()"); + } + + public override int read(char[] arg0, int arg1, int arg2) + { + throw new System.NotSupportedException("read(char[], int, int)"); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testWrapping() throws Exception + public virtual void testWrapping() + { + CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream); + try + { + cs.mark(1); + fail(); + } + catch (Exception e) + { + assertEquals("mark(int)", e.Message); + } + + try + { + cs.markSupported(); + fail(); + } + catch (Exception e) + { + assertEquals("markSupported()", e.Message); + } + + try + { + cs.read(); + fail(); + } + catch (Exception e) + { + assertEquals("read()", e.Message); + } + + try + { + cs.read(new char[0]); + fail(); + } + catch (Exception e) + { + assertEquals("read(char[])", e.Message); + } + + try + { + cs.read(CharBuffer.wrap(new char[0])); + fail(); + } + catch (Exception e) + { + assertEquals("read(CharBuffer)", e.Message); + } + + try + { + cs.reset(); + fail(); + } + catch (Exception e) + { + assertEquals("reset()", e.Message); + } + + try + { + cs.skip(1); + fail(); + } + catch (Exception e) + { + assertEquals("skip(long)", e.Message); + } + + try + { + cs.correctOffset(1); + fail(); + } + catch (Exception e) + { + assertEquals("correct(int)", e.Message); + } + + try + { + cs.close(); + fail(); + } + catch (Exception e) + { + assertEquals("close()", e.Message); + } + + try + { + cs.read(new char[0], 0, 0); + fail(); + } + catch (Exception e) + { + assertEquals("read(char[], int, int)", e.Message); + } + } + + // todo: test framework? + + internal sealed class SopTokenFilter : TokenFilter + { + + internal SopTokenFilter(TokenStream input) : base(input) + { + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException + public override bool incrementToken() + { + if (input.incrementToken()) + { + Console.WriteLine(input.GetType().Name + "->" + this.reflectAsString(false)); + return true; + } + else + { + return false; + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void end() throws java.io.IOException + public override void end() + { + base.end(); + Console.WriteLine(input.GetType().Name + ".end()"); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void close() throws java.io.IOException + public override void close() + { + base.close(); + Console.WriteLine(input.GetType().Name + ".close()"); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void reset() throws java.io.IOException + public override void reset() + { + base.reset(); + Console.WriteLine(input.GetType().Name + ".reset()"); + } + } + + // LUCENE-5269 +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception + public virtual void testUnicodeShinglesAndNgrams() + { + Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); + checkRandomData(random(), analyzer, 2000); + } + + private class AnalyzerAnonymousInnerClassHelper : Analyzer + { + private readonly TestBugInSomething outerInstance; + + public AnalyzerAnonymousInnerClassHelper(TestBugInSomething outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94); + //TokenStream stream = new SopTokenFilter(tokenizer); + TokenStream stream = new ShingleFilter(tokenizer, 5); + //stream = new SopTokenFilter(stream); + stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83); + //stream = new SopTokenFilter(stream); + return new TokenStreamComponents(tokenizer, stream); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testCuriousWikipediaString() throws Exception + public virtual void testCuriousWikipediaString() + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new java.util.HashSet<>(java.util.Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false); + CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final byte table[] = new byte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 }; + sbyte[] table = new sbyte[] {-57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20}; + Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table); + checkAnalysisConsistency(random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2

jb"); + } + + private class AnalyzerAnonymousInnerClassHelper2 : Analyzer + { + private readonly TestBugInSomething outerInstance; + + private CharArraySet protWords; + private sbyte[] table; + + public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, sbyte[] table) + { + this.outerInstance = outerInstance; + this.protWords = protWords; + this.table = table; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new WikipediaTokenizer(reader); + TokenStream stream = new SopTokenFilter(tokenizer); + stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, -50, protWords); + stream = new SopTokenFilter(stream); + return new TokenStreamComponents(tokenizer, stream); + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs new file mode 100644 index 0000000..9b3f425 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs @@ -0,0 +1,395 @@ +using System; + +namespace org.apache.lucene.analysis.core +{ + + using ClassicAnalyzer = org.apache.lucene.analysis.standard.ClassicAnalyzer; + using Document = org.apache.lucene.document.Document; + using Field = org.apache.lucene.document.Field; + using TextField = org.apache.lucene.document.TextField; + using DocsAndPositionsEnum = org.apache.lucene.index.DocsAndPositionsEnum; + using DocsEnum = org.apache.lucene.index.DocsEnum; + using IndexReader = org.apache.lucene.index.IndexReader; + using IndexWriter = org.apache.lucene.index.IndexWriter; + using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig; + using MultiFields = org.apache.lucene.index.MultiFields; + using Term = org.apache.lucene.index.Term; + using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator; + using RAMDirectory = org.apache.lucene.store.RAMDirectory; + using BytesRef = org.apache.lucene.util.BytesRef; + using Version = org.apache.lucene.util.Version; + + + + /// + /// Copyright 2004 The Apache Software Foundation + ///

+ /// Licensed under the Apache License, Version 2.0 (the "License"); + /// you may not use this file except in compliance with the License. + /// You may obtain a copy of the License at + ///

+ /// http://www.apache.org/licenses/LICENSE-2.0 + ///

+ /// Unless required by applicable law or agreed to in writing, software + /// distributed under the License is distributed on an "AS IS" BASIS, + /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + /// See the License for the specific language governing permissions and + /// limitations under the License. + ///

+ + public class TestClassicAnalyzer : BaseTokenStreamTestCase + { + + private Analyzer a = new ClassicAnalyzer(TEST_VERSION_CURRENT); + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testMaxTermLength() throws Exception + public virtual void testMaxTermLength() + { + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + sa.MaxTokenLength = 5; + assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testMaxTermLength2() throws Exception + public virtual void testMaxTermLength2() + { + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "toolong", "xy", "z"}); + sa.MaxTokenLength = 5; + + assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testMaxTermLength3() throws Exception + public virtual void testMaxTermLength3() + { + char[] chars = new char[255]; + for (int i = 0;i < 255;i++) + { + chars[i] = 'a'; + } + string longTerm = new string(chars, 0, 255); + + assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new string[]{"ab", "cd", longTerm, "xy", "z"}); + assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new string[]{"ab", "cd", "xy", "z"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testAlphanumeric() throws Exception + public virtual void testAlphanumeric() + { + // alphanumeric tokens + assertAnalyzesTo(a, "B2B", new string[]{"b2b"}); + assertAnalyzesTo(a, "2B", new string[]{"2b"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testUnderscores() throws Exception + public virtual void testUnderscores() + { + // underscores are delimiters, but not in email addresses (below) + assertAnalyzesTo(a, "word_having_underscore", new string[]{"word", "having", "underscore"}); + assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new string[]{"word", "underscore", "stopwords"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testDelimiters() throws Exception + public virtual void testDelimiters() + { + // other delimiters: "-", "/", "," + assertAnalyzesTo(a, "some-dashed-phrase", new string[]{"some", "dashed", "phrase"}); + assertAnalyzesTo(a, "dogs,chase,cats", new string[]{"dogs", "chase", "cats"}); + assertAnalyzesTo(a, "ac/dc", new string[]{"ac", "dc"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testApostrophes() throws Exception + public virtual void testApostrophes() + { + // internal apostrophes: O'Reilly, you're, O'Reilly's + // possessives are actually removed by StardardFilter, not the tokenizer + assertAnalyzesTo(a, "O'Reilly", new string[]{"o'reilly"}); + assertAnalyzesTo(a, "you're", new string[]{"you're"}); + assertAnalyzesTo(a, "she's", new string[]{"she"}); + assertAnalyzesTo(a, "Jim's", new string[]{"jim"}); + assertAnalyzesTo(a, "don't", new string[]{"don't"}); + assertAnalyzesTo(a, "O'Reilly's", new string[]{"o'reilly"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testTSADash() throws Exception + public virtual void testTSADash() + { + // t and s had been stopwords in Lucene <= 2.0, which made it impossible + // to correctly search for these terms: + assertAnalyzesTo(a, "s-class", new string[]{"s", "class"}); + assertAnalyzesTo(a, "t-com", new string[]{"t", "com"}); + // 'a' is still a stopword: + assertAnalyzesTo(a, "a-class", new string[]{"class"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testCompanyNames() throws Exception + public virtual void testCompanyNames() + { + // company names + assertAnalyzesTo(a, "AT&T", new string[]{"at&t"}); + assertAnalyzesTo(a, "Excite@Home", new string[]{"excite@home"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLucene1140() throws Exception + public virtual void testLucene1140() + { + try + { + ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(analyzer, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {""}); + } + catch (System.NullReferenceException) + { + fail("Should not throw an NPE and it did"); + } + + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testDomainNames() throws Exception + public virtual void testDomainNames() + { + // Current lucene should not show the bug + ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); + + // domain names + assertAnalyzesTo(a2, "www.nutch.org", new string[]{"www.nutch.org"}); + //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068. + // the following should be recognized as HOST: + assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {""}); + + // 2.3 should show the bug. But, alas, it's obsolete, we don't support it. + // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23); + // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "" }); + + // 2.4 should not show the bug. But, alas, it's also obsolete, + // so we check latest released (Robert's gonna break this on 4.0 soon :) ) + a2 = new ClassicAnalyzer(Version.LUCENE_31); + assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testEMailAddresses() throws Exception + public virtual void testEMailAddresses() + { + // email addresses, possibly with underscores, periods, etc + assertAnalyzesTo(a, "test@example.com", new string[]{"test@example.com"}); + assertAnalyzesTo(a, "first.lastname@example.com", new string[]{"first.lastname@example.com"}); + assertAnalyzesTo(a, "first_lastname@example.com", new string[]{"first_lastname@example.com"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testNumeric() throws Exception + public virtual void testNumeric() + { + // floating point, serial, model numbers, ip addresses, etc. + // every other segment must have at least one digit + assertAnalyzesTo(a, "21.35", new string[]{"21.35"}); + assertAnalyzesTo(a, "R2D2 C3PO", new string[]{"r2d2", "c3po"}); + assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"}); + assertAnalyzesTo(a, "1-2-3", new string[]{"1-2-3"}); + assertAnalyzesTo(a, "a1-b2-c3", new string[]{"a1-b2-c3"}); + assertAnalyzesTo(a, "a1-b-c3", new string[]{"a1-b-c3"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testTextWithNumbers() throws Exception + public virtual void testTextWithNumbers() + { + // numbers + assertAnalyzesTo(a, "David has 5000 bones", new string[]{"david", "has", "5000", "bones"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testVariousText() throws Exception + public virtual void testVariousText() + { + // various + assertAnalyzesTo(a, "C embedded developers wanted", new string[]{"c", "embedded", "developers", "wanted"}); + assertAnalyzesTo(a, "foo bar FOO BAR", new string[]{"foo", "bar", "foo", "bar"}); + assertAnalyzesTo(a, "foo bar . FOO <> BAR", new string[]{"foo", "bar", "foo", "bar"}); + assertAnalyzesTo(a, "\"QUOTED\" word", new string[]{"quoted", "word"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testAcronyms() throws Exception + public virtual void testAcronyms() + { + // acronyms have their dots stripped + assertAnalyzesTo(a, "U.S.A.", new string[]{"usa"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testCPlusPlusHash() throws Exception + public virtual void testCPlusPlusHash() + { + // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens. + assertAnalyzesTo(a, "C++", new string[]{"c"}); + assertAnalyzesTo(a, "C#", new string[]{"c"}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testKorean() throws Exception + public virtual void testKorean() + { + // Korean words + assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[]{"안녕하세요", "한글입니다"}); + } + + // Compliance with the "old" JavaCC-based analyzer, see: + // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752 + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceFileName() throws Exception + public virtual void testComplianceFileName() + { + assertAnalyzesTo(a, "2004.jpg", new string[]{"2004.jpg"}, new string[]{""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceNumericIncorrect() throws Exception + public virtual void testComplianceNumericIncorrect() + { + assertAnalyzesTo(a, "62.46", new string[]{"62.46"}, new string[]{""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceNumericLong() throws Exception + public virtual void testComplianceNumericLong() + { + assertAnalyzesTo(a, "978-0-94045043-1", new string[]{"978-0-94045043-1"}, new string[]{""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceNumericFile() throws Exception + public virtual void testComplianceNumericFile() + { + assertAnalyzesTo(a, "78academyawards/rules/rule02.html", new string[]{"78academyawards/rules/rule02.html"}, new string[]{""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceNumericWithUnderscores() throws Exception + public virtual void testComplianceNumericWithUnderscores() + { + assertAnalyzesTo(a, "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", new string[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"}, new string[]{""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceNumericWithDash() throws Exception + public virtual void testComplianceNumericWithDash() + { + assertAnalyzesTo(a, "mid-20th", new string[]{"mid-20th"}, new string[]{""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testComplianceManyTokens() throws Exception + public virtual void testComplianceManyTokens() + { + assertAnalyzesTo(a, "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm " + "safari-0-sheikh-zayed-grand-mosque.jpg", new string[]{"money.cnn.com", "magazines", "fortune", "fortune", "archive/2007/03/19/8402357", "index.htm", "safari-0-sheikh", "zayed", "grand", "mosque.jpg"}, new string[]{"", "", "", "", "", "", "", "", "", ""}); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testJava14BWCompatibility() throws Exception + public virtual void testJava14BWCompatibility() + { + ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30); + assertAnalyzesTo(sa, "test\u02C6test", new string[] {"test", "test"}); + } + + /// + /// Make sure we skip wicked long terms. + /// +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException + public virtual void testWickedLongTerm() + { + RAMDirectory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); + + char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; + Arrays.fill(chars, 'x'); + Document doc = new Document(); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final String bigTerm = new String(chars); + string bigTerm = new string(chars); + + // This produces a too-long term: + string contents = "abc xyz x" + bigTerm + " another term"; + doc.add(new TextField("content", contents, Field.Store.NO)); + writer.addDocument(doc); + + // Make sure we can add another normal document + doc = new Document(); + doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir); + + // Make sure all terms < max size were indexed + assertEquals(2, reader.docFreq(new Term("content", "abc"))); + assertEquals(1, reader.docFreq(new Term("content", "bbb"))); + assertEquals(1, reader.docFreq(new Term("content", "term"))); + assertEquals(1, reader.docFreq(new Term("content", "another"))); + + // Make sure position is still incremented when + // massive term is skipped: + DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another")); + assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertEquals(1, tps.freq()); + assertEquals(3, tps.nextPosition()); + + // Make sure the doc that has the massive term is in + // the index: + assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); + + reader.close(); + + // Make sure we can add a document with exactly the + // maximum length term, and search on that term: + doc = new Document(); + doc.add(new TextField("content", bigTerm, Field.Store.NO)); + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + sa.MaxTokenLength = 100000; + writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); + writer.addDocument(doc); + writer.close(); + reader = IndexReader.open(dir); + assertEquals(1, reader.docFreq(new Term("content", bigTerm))); + reader.close(); + + dir.close(); + } + + /// + /// blast some random strings through the analyzer +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testRandomStrings() throws Exception + public virtual void testRandomStrings() + { + checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER); + } + + /// + /// blast some random large strings through the analyzer +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception + public virtual void testRandomHugeStrings() + { + Random random = random(); + checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192); + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs new file mode 100644 index 0000000..6155918 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs @@ -0,0 +1,302 @@ +using System; + +namespace org.apache.lucene.analysis.core +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute; + using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + using LuceneTestCase = org.apache.lucene.util.LuceneTestCase; + using TestUtil = org.apache.lucene.util.TestUtil; + using Automaton = org.apache.lucene.util.automaton.Automaton; + using BasicOperations = org.apache.lucene.util.automaton.BasicOperations; + using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton; + using State = org.apache.lucene.util.automaton.State; + using Transition = org.apache.lucene.util.automaton.Transition; + + /// + /// Compares MockTokenizer (which is simple with no optimizations) with equivalent + /// core tokenizers (that have optimizations like buffering). + /// + /// Any tests here need to probably consider unicode version of the JRE (it could + /// cause false fails). + /// + public class TestDuelingAnalyzers : LuceneTestCase + { + private CharacterRunAutomaton jvmLetter; + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void setUp() throws Exception + public override void setUp() + { + base.setUp(); + // build an automaton matching this jvm's letter definition + State initial = new State(); + State accept = new State(); + accept.Accept = true; + for (int i = 0; i <= 0x10FFFF; i++) + { + if (char.IsLetter(i)) + { + initial.addTransition(new Transition(i, i, accept)); + } + } + Automaton single = new Automaton(initial); + single.reduce(); + Automaton repeat = BasicOperations.repeat(single); + jvmLetter = new CharacterRunAutomaton(repeat); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLetterAscii() throws Exception + public virtual void testLetterAscii() + { + Random random = random(); + Analyzer left = new MockAnalyzer(random, jvmLetter, false); + Analyzer right = new AnalyzerAnonymousInnerClassHelper(this); + for (int i = 0; i < 1000; i++) + { + string s = TestUtil.randomSimpleString(random); + assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); + } + } + + private class AnalyzerAnonymousInnerClassHelper : Analyzer + { + private readonly TestDuelingAnalyzers outerInstance; + + public AnalyzerAnonymousInnerClassHelper(TestDuelingAnalyzers outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + } + + // not so useful since its all one token?! +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLetterAsciiHuge() throws Exception + public virtual void testLetterAsciiHuge() + { + Random random = random(); + int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2 + MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); + left.MaxTokenLength = 255; // match CharTokenizer's max token length + Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this); + int numIterations = atLeast(50); + for (int i = 0; i < numIterations; i++) + { + string s = TestUtil.randomSimpleString(random, maxLength); + assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); + } + } + + private class AnalyzerAnonymousInnerClassHelper2 : Analyzer + { + private readonly TestDuelingAnalyzers outerInstance; + + public AnalyzerAnonymousInnerClassHelper2(TestDuelingAnalyzers outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLetterHtmlish() throws Exception + public virtual void testLetterHtmlish() + { + Random random = random(); + Analyzer left = new MockAnalyzer(random, jvmLetter, false); + Analyzer right = new AnalyzerAnonymousInnerClassHelper3(this); + for (int i = 0; i < 1000; i++) + { + string s = TestUtil.randomHtmlishString(random, 20); + assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); + } + } + + private class AnalyzerAnonymousInnerClassHelper3 : Analyzer + { + private readonly TestDuelingAnalyzers outerInstance; + + public AnalyzerAnonymousInnerClassHelper3(TestDuelingAnalyzers outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLetterHtmlishHuge() throws Exception + public virtual void testLetterHtmlishHuge() + { + Random random = random(); + int maxLength = 1024; // this is number of elements, not chars! + MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); + left.MaxTokenLength = 255; // match CharTokenizer's max token length + Analyzer right = new AnalyzerAnonymousInnerClassHelper4(this); + int numIterations = atLeast(50); + for (int i = 0; i < numIterations; i++) + { + string s = TestUtil.randomHtmlishString(random, maxLength); + assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); + } + } + + private class AnalyzerAnonymousInnerClassHelper4 : Analyzer + { + private readonly TestDuelingAnalyzers outerInstance; + + public AnalyzerAnonymousInnerClassHelper4(TestDuelingAnalyzers outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLetterUnicode() throws Exception + public virtual void testLetterUnicode() + { + Random random = random(); + Analyzer left = new MockAnalyzer(random(), jvmLetter, false); + Analyzer right = new AnalyzerAnonymousInnerClassHelper5(this); + for (int i = 0; i < 1000; i++) + { + string s = TestUtil.randomUnicodeString(random); + assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); + } + } + + private class AnalyzerAnonymousInnerClassHelper5 : Analyzer + { + private readonly TestDuelingAnalyzers outerInstance; + + public AnalyzerAnonymousInnerClassHelper5(TestDuelingAnalyzers outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testLetterUnicodeHuge() throws Exception + public virtual void testLetterUnicodeHuge() + { + Random random = random(); + int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge + MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); + left.MaxTokenLength = 255; // match CharTokenizer's max token length + Analyzer right = new AnalyzerAnonymousInnerClassHelper6(this); + int numIterations = atLeast(50); + for (int i = 0; i < numIterations; i++) + { + string s = TestUtil.randomUnicodeString(random, maxLength); + assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); + } + } + + private class AnalyzerAnonymousInnerClassHelper6 : Analyzer + { + private readonly TestDuelingAnalyzers outerInstance; + + public AnalyzerAnonymousInnerClassHelper6(TestDuelingAnalyzers outerInstance) + { + this.outerInstance = outerInstance; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, tokenizer); + } + } + + // we only check a few core attributes here. + // TODO: test other things +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception + public virtual void assertEquals(string s, TokenStream left, TokenStream right) + { + left.reset(); + right.reset(); + CharTermAttribute leftTerm = left.addAttribute(typeof(CharTermAttribute)); + CharTermAttribute rightTerm = right.addAttribute(typeof(CharTermAttribute)); + OffsetAttribute leftOffset = left.addAttribute(typeof(OffsetAttribute)); + OffsetAttribute rightOffset = right.addAttribute(typeof(OffsetAttribute)); + PositionIncrementAttribute leftPos = left.addAttribute(typeof(PositionIncrementAttribute)); + PositionIncrementAttribute rightPos = right.addAttribute(typeof(PositionIncrementAttribute)); + + while (left.incrementToken()) + { + assertTrue("wrong number of tokens for input: " + s, right.incrementToken()); + assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString()); + assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement); + assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset()); + assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); + }; + assertFalse("wrong number of tokens for input: " + s, right.incrementToken()); + left.end(); + right.end(); + assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); + left.close(); + right.close(); + } + + // TODO: maybe push this out to TestUtil or LuceneTestCase and always use it instead? + private static Reader newStringReader(string s) + { + Random random = random(); + Reader r = new StringReader(s); + if (random.nextBoolean()) + { + r = new MockReaderWrapper(random, r); + } + return r; + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs new file mode 100644 index 0000000..8af7962 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs @@ -0,0 +1,263 @@ +using System; +using System.Diagnostics; +using System.Collections; +using System.Collections.Generic; + +namespace org.apache.lucene.analysis.core +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory; + using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory; + using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent; + using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware; + using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader; + using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory; + using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory; + using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory; + + /// + /// Sanity check some things about all factories, + /// we do our best to see if we can sanely initialize it with + /// no parameters and smoke test it, etc. + /// + // TODO: move this, TestRandomChains, and TestAllAnalyzersHaveFactories + // to an integration test module that sucks in all analysis modules. + // currently the only way to do this is via eclipse etc (LUCENE-3974) + public class TestFactories : BaseTokenStreamTestCase + { +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void test() throws java.io.IOException + public virtual void test() + { + foreach (string tokenizer in TokenizerFactory.availableTokenizers()) + { + doTestTokenizer(tokenizer); + } + + foreach (string tokenFilter in TokenFilterFactory.availableTokenFilters()) + { + doTestTokenFilter(tokenFilter); + } + + foreach (string charFilter in CharFilterFactory.availableCharFilters()) + { + doTestCharFilter(charFilter); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: private void doTestTokenizer(String tokenizer) throws java.io.IOException + private void doTestTokenizer(string tokenizer) + { +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: Class factoryClazz = org.apache.lucene.analysis.util.TokenizerFactory.lookupClass(tokenizer); + Type factoryClazz = TokenizerFactory.lookupClass(tokenizer); + TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz); + if (factory != null) + { + // we managed to fully create an instance. check a few more things: + + // if it implements MultiTermAware, sanity check its impl + if (factory is MultiTermAwareComponent) + { + AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent; + assertNotNull(mtc); + // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it + assertFalse(mtc is CharFilterFactory); + } + + // beast it just a little, it shouldnt throw exceptions: + // (it should have thrown them in initialize) + checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: private void doTestTokenFilter(String tokenfilter) throws java.io.IOException + private void doTestTokenFilter(string tokenfilter) + { +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: Class factoryClazz = org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(tokenfilter); + Type factoryClazz = TokenFilterFactory.lookupClass(tokenfilter); + TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz); + if (factory != null) + { + // we managed to fully create an instance. check a few more things: + + // if it implements MultiTermAware, sanity check its impl + if (factory is MultiTermAwareComponent) + { + AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent; + assertNotNull(mtc); + // its not ok to return a charfilter or tokenizer here, this makes no sense + assertTrue(mtc is TokenFilterFactory); + } + + // beast it just a little, it shouldnt throw exceptions: + // (it should have thrown them in initialize) + checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: private void doTestCharFilter(String charfilter) throws java.io.IOException + private void doTestCharFilter(string charfilter) + { +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: Class factoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(charfilter); + Type factoryClazz = CharFilterFactory.lookupClass(charfilter); + CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz); + if (factory != null) + { + // we managed to fully create an instance. check a few more things: + + // if it implements MultiTermAware, sanity check its impl + if (factory is MultiTermAwareComponent) + { + AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent; + assertNotNull(mtc); + // its not ok to return a tokenizer or tokenfilter here, this makes no sense + assertTrue(mtc is CharFilterFactory); + } + + // beast it just a little, it shouldnt throw exceptions: + // (it should have thrown them in initialize) + checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false); + } + } + + /// + /// tries to initialize a factory with no arguments +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: private org.apache.lucene.analysis.util.AbstractAnalysisFactory initialize(Class factoryClazz) throws java.io.IOException + private AbstractAnalysisFactory initialize(Type factoryClazz) where T1 : org.apache.lucene.analysis.util.AbstractAnalysisFactory + { + IDictionary args = new Dictionary(); + args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString(); +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: Constructor ctor; + Constructor ctor; + try + { + ctor = factoryClazz.GetConstructor(typeof(IDictionary)); + } + catch (Exception) + { + throw new Exception("factory '" + factoryClazz + "' does not have a proper ctor!"); + } + + AbstractAnalysisFactory factory = null; + try + { + factory = ctor.newInstance(args); + } + catch (InstantiationException e) + { + throw new Exception(e); + } + catch (IllegalAccessException e) + { + throw new Exception(e); + } + catch (InvocationTargetException e) + { + if (e.InnerException is System.ArgumentException) + { + // its ok if we dont provide the right parameters to throw this + return null; + } + } + + if (factory is ResourceLoaderAware) + { + try + { + ((ResourceLoaderAware) factory).inform(new StringMockResourceLoader("")); + } + catch (IOException) + { + // its ok if the right files arent available or whatever to throw this + } + catch (System.ArgumentException) + { + // is this ok? I guess so + } + } + return factory; + } + + // some silly classes just so we can use checkRandomData + private TokenizerFactory assertingTokenizer = new TokenizerFactoryAnonymousInnerClassHelper(new Dictionary()); + + private class TokenizerFactoryAnonymousInnerClassHelper : TokenizerFactory + { + public TokenizerFactoryAnonymousInnerClassHelper(Dictionary java) : base(Hashtable) + { + } + + public override MockTokenizer create(AttributeFactory factory, Reader input) + { + return new MockTokenizer(factory, input); + } + } + + private class FactoryAnalyzer : Analyzer + { + internal readonly TokenizerFactory tokenizer; + internal readonly CharFilterFactory charFilter; + internal readonly TokenFilterFactory tokenfilter; + + internal FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) + { + Debug.Assert(tokenizer != null); + this.tokenizer = tokenizer; + this.charFilter = charFilter; + this.tokenfilter = tokenfilter; + } + + protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) + { + Tokenizer tf = tokenizer.create(reader); + if (tokenfilter != null) + { + return new TokenStreamComponents(tf, tokenfilter.create(tf)); + } + else + { + return new TokenStreamComponents(tf); + } + } + + protected internal override Reader initReader(string fieldName, Reader reader) + { + if (charFilter != null) + { + return charFilter.create(reader); + } + else + { + return reader; + } + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs new file mode 100644 index 0000000..ecde6df --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs @@ -0,0 +1,143 @@ +namespace org.apache.lucene.analysis.core +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute; + using Document = org.apache.lucene.document.Document; + using Field = org.apache.lucene.document.Field; + using StringField = org.apache.lucene.document.StringField; + using TextField = org.apache.lucene.document.TextField; + using DirectoryReader = org.apache.lucene.index.DirectoryReader; + using DocsEnum = org.apache.lucene.index.DocsEnum; + using IndexReader = org.apache.lucene.index.IndexReader; + using IndexWriter = org.apache.lucene.index.IndexWriter; + using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig; + using MultiFields = org.apache.lucene.index.MultiFields; + using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator; + using IndexSearcher = org.apache.lucene.search.IndexSearcher; + using Directory = org.apache.lucene.store.Directory; + using RAMDirectory = org.apache.lucene.store.RAMDirectory; + using BytesRef = org.apache.lucene.util.BytesRef; + using IOUtils = org.apache.lucene.util.IOUtils; + using TestUtil = org.apache.lucene.util.TestUtil; + + public class TestKeywordAnalyzer : BaseTokenStreamTestCase + { + + private Directory directory; + private IndexSearcher searcher; + private IndexReader reader; + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void setUp() throws Exception + public override void setUp() + { + base.setUp(); + directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))); + + Document doc = new Document(); + doc.add(new StringField("partnum", "Q36", Field.Store.YES)); + doc.add(new TextField("description", "Illidium Space Modulator", Field.Store.YES)); + writer.addDocument(doc); + + writer.close(); + + reader = DirectoryReader.open(directory); + searcher = newSearcher(reader); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void tearDown() throws Exception + public override void tearDown() + { + reader.close(); + directory.close(); + base.tearDown(); + } + + /* + public void testPerFieldAnalyzer() throws Exception { + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT)); + analyzer.addAnalyzer("partnum", new KeywordAnalyzer()); + + QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, "description", analyzer); + Query query = queryParser.parse("partnum:Q36 AND SPACE"); + + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("Q36 kept as-is", + "+partnum:Q36 +space", query.toString("description")); + assertEquals("doc found!", 1, hits.length); + } + */ + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testMutipleDocument() throws Exception + public virtual void testMutipleDocument() + { + RAMDirectory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer())); + Document doc = new Document(); + doc.add(new TextField("partnum", "Q36", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new TextField("partnum", "Q37", Field.Store.YES)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = DirectoryReader.open(dir); + DocsEnum td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q36"), MultiFields.getLiveDocs(reader), null, 0); + assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q37"), MultiFields.getLiveDocs(reader), null, 0); + assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + } + + // LUCENE-1441 +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testOffsets() throws Exception + public virtual void testOffsets() + { + TokenStream stream = (new KeywordAnalyzer()).tokenStream("field", new StringReader("abcd")); + try + { + OffsetAttribute offsetAtt = stream.addAttribute(typeof(OffsetAttribute)); + stream.reset(); + assertTrue(stream.incrementToken()); + assertEquals(0, offsetAtt.startOffset()); + assertEquals(4, offsetAtt.endOffset()); + assertFalse(stream.incrementToken()); + stream.end(); + } + finally + { + IOUtils.closeWhileHandlingException(stream); + } + } + + /// + /// blast some random strings through the analyzer +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void testRandomStrings() throws Exception + public virtual void testRandomStrings() + { + checkRandomData(random(), new KeywordAnalyzer(), 1000 * RANDOM_MULTIPLIER); + } + } + +} \ No newline at end of file