Return-Path: Delivered-To: apmail-incubator-lucene-net-commits-archive@minotaur.apache.org Received: (qmail 45802 invoked from network); 3 Nov 2009 18:07:29 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 3 Nov 2009 18:07:29 -0000 Received: (qmail 74146 invoked by uid 500); 3 Nov 2009 18:07:29 -0000 Delivered-To: apmail-incubator-lucene-net-commits-archive@incubator.apache.org Received: (qmail 74119 invoked by uid 500); 3 Nov 2009 18:07:29 -0000 Mailing-List: contact lucene-net-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@incubator.apache.org Delivered-To: mailing list lucene-net-commits@incubator.apache.org Received: (qmail 74110 invoked by uid 99); 3 Nov 2009 18:07:28 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Nov 2009 18:07:28 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Nov 2009 18:07:13 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 423B423889EA; Tue, 3 Nov 2009 18:06:50 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r832486 [3/29] - in /incubator/lucene.net/trunk/C#/src: ./ Demo/DeleteFiles/ Demo/DemoLib/ Demo/IndexFiles/ Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/Search/ Lucene.... Date: Tue, 03 Nov 2009 18:06:38 -0000 To: lucene-net-commits@incubator.apache.org From: aroush@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20091103180650.423B423889EA@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestASCIIFoldingFilter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestASCIIFoldingFilter.cs?rev=832486&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestASCIIFoldingFilter.cs (added) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestASCIIFoldingFilter.cs Tue Nov 3 18:06:27 2009 @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using NUnit.Framework; + +using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute; + +namespace Lucene.Net.Analysis +{ + + [TestFixture] + public class TestASCIIFoldingFilter:BaseTokenStreamTestCase + { + + // testLain1Accents() is a copy of TestLatin1AccentFilter.testU(). + [Test] + public virtual void testLatin1Accents() + { + TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés À LA CHAÃŽNE À �? Â Ã Ä Ã… Æ Ç È É Ê Ë ÃŒ �? ÃŽ �? IJ �? Ñ" + " Ã’ Ó Ô Õ Ö Ø Å’ Þ Ù Ú Û Ãœ �? Ÿ à á â ã ä Ã¥ æ ç è é ê ë ì í î ï ij" + " ð ñ ò ó ô õ ö ø Å“ ß þ ù ú û ü ý ÿ �? fl")); + ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); + + TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute)); + + AssertTermEquals("Des", filter, termAtt); + AssertTermEquals("mot", filter, termAtt); + AssertTermEquals("cles", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("LA", filter, termAtt); + AssertTermEquals("CHAINE", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("AE", filter, termAtt); + AssertTermEquals("C", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("IJ", filter, termAtt); + AssertTermEquals("D", filter, termAtt); + AssertTermEquals("N", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("OE", filter, termAtt); + AssertTermEquals("TH", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("Y", filter, termAtt); + AssertTermEquals("Y", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("ae", filter, termAtt); + AssertTermEquals("c", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("ij", filter, termAtt); + AssertTermEquals("d", filter, termAtt); + AssertTermEquals("n", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("oe", filter, termAtt); + AssertTermEquals("ss", filter, termAtt); + AssertTermEquals("th", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("y", filter, termAtt); + AssertTermEquals("y", filter, termAtt); + AssertTermEquals("fi", filter, termAtt); + AssertTermEquals("fl", filter, termAtt); + Assert.IsFalse(filter.IncrementToken()); + } + + + // The following Perl script generated the foldings[] array automatically + // from ASCIIFoldingFilter.java: + // + // ============== begin get.test.cases.pl ============== + // + // use strict; + // use warnings; + // + // my $file = "ASCIIFoldingFilter.java"; + // my $output = "testcases.txt"; + // my %codes = (); + // my $folded = ''; + // + // open IN, "<:utf8", $file || die "Error opening input file '$file': $!"; + // open OUT, ">:utf8", $output || die "Error opening output file '$output': $!"; + // + // while (my $line = ) { + // chomp($line); + // # case '\u0133': // [ description ] + // if ($line =~ /case\s+'\\u(....)':.*\[([^\]]+)\]/) { + // my $code = $1; + // my $desc = $2; + // $codes{$code} = $desc; + // } + // # output[outputPos++] = 'A'; + // elsif ($line =~ /output\[outputPos\+\+\] = '(.+)';/) { + // my $output_char = $1; + // $folded .= $output_char; + // } + // elsif ($line =~ /break;/ && length($folded) > 0) { + // my $first = 1; + // for my $code (sort { hex($a) <=> hex($b) } keys %codes) { + // my $desc = $codes{$code}; + // print OUT ' '; + // print OUT '+ ' if (not $first); + // $first = 0; + // print OUT '"', chr(hex($code)), qq!" // U+$code: $desc\n!; + // } + // print OUT qq! ,"$folded", // Folded result\n\n!; + // %codes = (); + // $folded = ''; + // } + // } + // close OUT; + // + // ============== end get.test.cases.pl ============== + // + [Test] + public virtual void testAllFoldings() + { + // Alternating strings of: + // 1. All non-ASCII characters to be folded, concatenated together as a + // single string. + // 2. The string of ASCII characters to which each of the above + // characters should be folded. + System.String[] foldings = new System.String[]{"À" + "�?" + "Â" + "Ã" + "Ä" + "Ã…" + "Ä€" + "Ä‚" + "Ä„" + "�?" + "�?" + "Çž" + "Ç " + "Ǻ" + "È€" + "È‚" + "Ȧ" + "Ⱥ" + "á´€" + "Ḁ" + "Ạ" + "Ả" + "Ấ" + "Ầ" + "Ẩ" + "Ẫ" + "Ậ" + "Ắ" + "Ằ" + "Ẳ" + "Ẵ" + "Ặ" + "â’¶" + "A", "A", "à" + "á" + "â" + "ã" + "ä" + "Ã¥" + "�?" + "ă" + "Ä…" + "ÇŽ" + "ÇŸ" + "Ç¡" + "Ç»" + "�?" + "ȃ" + "ȧ" + "�?" + "É™" + "Éš" + "�?" + "�?" + "ᶕ" + "ẚ" + "ạ" + "ả" + "ấ" + " ầ" + "ẩ" + "ẫ" + "ậ" + "ắ" + "ằ" + "ẳ" + "ẵ" + "ặ" + "�?" + "â‚”" + "�?" + "â±¥" + "Ɐ" + "�?", "a", "Ꜳ", "AA", "Æ" + "Ç¢" + "Ǽ" + "�?", "AE", "Ꜵ", "AO", "Ꜷ", "AU", "Ꜹ" + "Ꜻ", "AV", "Ꜽ", "AY", "â’œ", "(a)", "ꜳ", "aa", "æ" + "Ç£" + "ǽ" + "á´‚", "ae", "ꜵ", "ao", "ꜷ", "au", "ꜹ" + "ꜻ", "av", "ꜽ", "ay", "�?" + "Æ‚" + "Ƀ" + "Ê™" + "á´ƒ" + "Ḃ" + "Ḅ" + "Ḇ" + "â’·" + "ï¼¢", "B", "Æ€" + "ƃ" + "É“" + "ᵬ" + "ᶀ" + "ḃ" + "ḅ" + "ḇ" + "↜‘" + "b", "b", "�?", "(b)", "Ç" + "Ć" + "Ĉ" + "ÄŠ" + "ÄŒ" + "Ƈ" + "È»" + "Ê—" + "á´„" + "Ḉ" + "â’¸" + "ï¼£", "C", "ç" + "ć" + "ĉ" + "Ä‹" + "�?" + "ƈ" + "ȼ" + "É•" + "ḉ" + "ↄ" + "â“’" + "Ꜿ" + "ꜿ" + "c", "c", "â’ž", "(c)", "�?" + "ÄŽ" + "�?" + "Ɖ" + "ÆŠ" + "Æ‹" + "á´…" + "á´†" + "Ḋ" + "Ḍ" + "Ḏ" + "�?" + "Ḓ" + "â’¹" + "�?�" + "D", "D", "ð" + "�?" + "Ä‘" + "ÆŒ" + "È¡" + "É–" + "É—" + "áµ­" + "�?" + "ᶑ" + "ḋ" + "�?" + "à ¯Â¿Â½?" + "ḑ" + "ḓ" + "â““" + "�?�" + "d", "d", "Ç„" + "DZ", "DZ", "Ç…" + "Dz", "Dz", "â’Ÿ", "(d)", "ȸ", "db", "dž" + "dz" + "Ê£" + "Ê¥", "dz", "È" + "É" + "Ê" + "Ë" + "Ä’" + "Ä”" + "Ä–" + "Ę" + "Äš" + "ÆŽ" + "�?" + "È„" + "Ȇ" + "Ȩ" + "Ɇ" + "á´‡" + "Ḕ" + "Ḗ" + "Ḙ" + "Ḛ" + "Ḝ" + "Ẹ" + "Ẻ" + "Ẽ" + "Ế" + "Ề" + "Ể" + "Ễ" + "Ệ" + "â’º" + "â±»" + "ï¼¥", "E", "è" + "é" + "ê" + "ë" + "Ä“" + "Ä•" + "Ä—" + "Ä™" + "Ä›" + "�?" + "È…" + "Ȇ¡" + "È©" + "ɇ" + "ɘ" + "É›" + "Éœ" + "�?" + "Éž" + "Êš" + + "á´ˆ" + "ᶒ" + "ᶓ" + "ᶔ" + "ḕ" + "ḗ" + "ḙ" + "ḛ" + "�?" + "ẹ" + "ẻ" + "ẽ" + "ế" + "�?" + "ể" + "á»…" + "ệ" + "â‚‘" + "â“”" + "ⱸ" + "ï½…", "e", "â’ ", "(e)", "Æ‘" + "Ḟ" + "â’»" + "ꜰ" + "�?�" + "ꟻ" + "F", "F", "Æ’" + "áµ®" + "ᶂ" + "ḟ" + "ẛ" + "â“•" + "�?�" + "f", "f", "â’¡", "(f)", "ff", "ff", "ffi", "ffi", "ffl", "ffl", "�?", "fi", "fl", "fl", "Äœ" + "Äž" + "Ä " + "Ä¢" + "Æ“" + "Ǥ" + "Ç¥" + "Ǧ" + "ǧ" + "Ç´ " + "É¢" + "Ê›" + "Ḡ" + "â’¼" + "�?�" + "�?�" + "G", "G", "�?" + "ÄŸ" + "Ä¡" + "Ä£" + "ǵ" + "É " + "É¡" + "áµ·" + "áµ¹" + "ᶃ" + "ḡ" + "â“–" + "�?�" + "g", "g", "â’¢", "(g)", "Ĥ" + "Ħ" + "Èž" + "Êœ" + "Ḣ" + "Ḥ" + "Ḧ" + "Ḩ" + "Ḫ" + "â’½" + "Ⱨ" + "â±µ" + "H", "H", "Ä¥" + "ħ" + "ÈŸ" + "É¥" + "ɦ" + "Ê®" + "ʯ" + "ḣ" + "ḥ" + "ḧ" + "ḩ" + "ḫ" + "ẖ" + "â“—" + "ⱨ" + "ⱶ" + "h", "h", "Ƕ", "HV", "â’£", "(h)", "Æ•", "hv", "ÃŒ" + "�?" + " ÃŽ" + "�?" + "Ĩ" + "Ī" + "Ĭ" + "Ä®" + "Ä°" + "Æ–" + "Æ—" + "�?" + "Ȉ" + "ÈŠ" + "ɪ" + "áµ»" + "Ḭ" + "Ḯ" + "Ỉ" + "Ị" + "â’¾" + "ꟾ" + "I", "I", "ì" + "í" + "î" + "ï" + "Ä©" + "Ä«" + "Ä­" + "į" + "ı" + "�?" + "ȉ" + "È‹" + "ɨ" + "á´‰" + "áµ¢" + "áµ¼" + "ᶖ" + "ḭ" + "ḯ" + "ỉ" + "ị" + "�?�" + "ⓘ" + "i", "i", "IJ", "IJ", "â’¤", "(i)", "ij", "ij", "Ä´" + "Ɉ" + "á´Š" + "â’¿" + "J", "J", "ĵ" + "Ç°" + "È·" + "ɉ" + "ÉŸ" + "Ê„" + "�?" + "â“™" + "â� �±Â¼" + "j", "j", "â’¥", "(j)", "Ķ" + "Ƙ" + "Ǩ" + "á´‹" + "Ḱ" + "Ḳ" + "Ḵ" + "â“€" + "Ⱪ" + "�?�" + "�?�" + "�?�" + "K", "K", "Ä·" + "Æ™" + "Ç©" + "Êž" + "ᶄ" + "ḱ" + "ḳ" + "ḵ" + "â“š" + "ⱪ" + "�??" + "�?�" + "�?�" + "k", "k", "â’¦", "(k)", "Ĺ" + "Ä»" + "Ľ" + "Ä¿" + "�?" + "Ƚ" + "ÊŸ" + "á´Œ" + "Ḷ" + "Ḹ" + "Ḻ" + "Ḽ" + "�?" + "â± " + "â±¢" + "�?�" + "�?�" + "Ꞁ" + "L", "L", "ĺ" + "ļ" + "ľ" + "Å€" + "Å‚" + "Æš" + "È´" + "É«" + "ɬ" + "É­" + "ᶅ" + "ḷ" + "ḹ" + "ḻ" + "ḽ" + "â“›" + "ⱡ" + + "�?�" + "�?�" + "�?" + "l", "l", "LJ", "LJ", "Ỻ", "LL", "Lj", "Lj", "â’§", "(l)", "lj", "lj", "á»»", "ll", "ʪ", "ls", "Ê«", "lz", "Æœ" + "�?" + "Ḿ" + "á¹€" + "Ṃ" + "â“‚" + "â±®" + "ꟽ" + "ꟿ" + "ï¼­", "M", "ɯ" + "É°" + "ɱ" + "ᵯ" + "ᶆ" + "ḿ" + "�?" + "ṃ" + "â“œ" + "�?", "m", "â’¨", "(m)", "Ñ" + "Ń" + "Å…" + "Ň" + "ÅŠ" + "�?" + "Ǹ" + "È " + "É´" + "á´Ž" + "Ṅ" + "Ṇ" + "Ṉ" + "Ṋ" + "Ⓝ" + "ï¼®", "N", "ñ" + "Å„" + "ņ" + "ň" + "ʼn" + "Å‹" + "ÆÅ� �" + "ǹ" + "ȵ" + "ɲ" + "ɳ" + "áµ°" + "ᶇ" + "á¹…" + "ṇ" + "ṉ" + "ṋ" + "�?�" + "�?" + "n", "n", "ÇŠ", "NJ", "Ç‹", "Nj", "â’©", "(n)", "ÇŒ", "nj", "Ã’" + "Ó" + "Ô" + "Õ" + "Ö" + "Ø" + "ÅŒ" + "ÅŽ" + "�?" + "Ɔ" + "ÆŸ" + "Æ " + "Ç‘" + "Ǫ" + "Ǭ" + "Ǿ" + "ÈŒ" + "ÈŽ" + "Ȫ" + "Ȭ" + "È®" + "È°" + "�?" + "�?" + "Ṍ" + "Ṏ" + "�?" + "á¹’" + "Ọ" + "Ỏ" + "�?" + "á»’" + "á»”" + "á»–" + "Ộ" + "Ớ" + "Ờ" + "Ở" + "á» " + "Ợ" + "â“„" + "�?� " + "�?�" + "O", "O", "ò" + "ó" + "ô" + "õ" + "ö" + "ø" + "�?" + "�?" + "Å‘" + "Æ¡" + "Ç’" + "Ç«" + "Ç­" + "Ç¿" + "�?" + "�?" + "È«" + "È­" + "ȯ" + "ȱ" + "É”" + "ɵ" + "á´–" + "á´—" + "ᶗ" + "�?" + "�?" + "ṑ" + "ṓ" + "�?" + "�?" + "ố" + "ồ" + "ổ" + "á»—" + "á»™" + "á»›" + "�?" + "ở" + "ỡ" + "ợ" + "â‚’" + "â“ž" + "ⱺ" + "�?�" + "�??" + "�?", "o", "Å’" + "ɶ", "OE", "�?�", "OO", "È¢" + "á´•", "OU", "â’ª", "(o)", "Å“" + "á´”", "oe", "�??", "oo", "È£", "ou", "Ƥ" + "á´˜" + "á¹”" + "á¹–" + "â“…" + "â±£" + "�??" + "�?�" + "�?�" + "ï¼°", "P", "Æ¥" + "áµ±" + "áµ½" + "ᶈ" + "ṕ" + "á¹—" + "â“Ÿ" + "�?�" + "�?�" + "�?�" + "ꟼ" + "�?", "p", "â’«", "(p)", "ÉŠ" + "Ⓠ" + "�?�" + "�?�" + "ï¼±", "Q", "ĸ" + "É‹" + "Ê " + "â“ " + "�?�" + "�?�" + "q", "q", "â’¬", "(q)", "ȹ", "qp", "Å”" + "Å–" + "Ř" + "�?" + "È’" + "ÉŒ" + "Ê€" + "�?" + "á´™" + "á´š" + "Ṙ" + "Ṛ" + "Ṝ" + "Ṟ" + "Ⓡ" + "Ɽ" + "�?�" + "êž‚" + "ï¼²", "R", "Å•" + + "Å—" + "Å™" + "È‘" + "È“" + "�?" + "ɼ" + "ɽ" + "ɾ" + "É¿" + "áµ£" + "áµ²" + "áµ³" + "ᶉ" + "á¹™" + "á¹›" + "�?" + "ṟ" + "â“¡" + "�?�" + "ꞃ" + "ï½’", "r", "â’­", "(r)", "Åš" + "Åœ" + "Åž" + "Å " + "Ș" + "á¹ " + "á¹¢" + "Ṥ" + "Ṧ" + "Ṩ" + "Ⓢ" + "ꜱ" + "êž…" + "ï¼³", "S", "Å›" + "�?" + "ÅŸ" + "Å¡" + "Å¿" + "È™" + "È¿" + "Ê‚" + "áµ´" + "ᶊ" + "ṡ" + "á¹£" + "á¹¥" + "ṧ" + "ṩ" + "ẜ" + "�?" + "â“¢" + "êž„" + "s", "s", "ẞ", "SS", "â’®", "( s)", "ß", "ss", "st", "st", "Å¢" + "Ť" + "Ŧ" + "Ƭ" + "Æ®" + "Èš" + "Ⱦ" + "á´›" + "Ṫ" + "Ṭ" + "á¹®" + "á¹°" + "Ⓣ" + "Ꞇ" + "ï¼´", "T", "Å£" + "Å¥" + "ŧ" + "Æ«" + "Æ­" + "È›" + "ȶ" + "ʇ" + "ʈ" + "áµµ" + "ṫ" + "á¹­" + "ṯ" + "á¹±" + "ẗ" + "â“£" + "ⱦ" + "ï½”", "t", "Þ" + "�?�", "TH", "Ꜩ", "TZ", "â’¯", "(t)", "ʨ", "tc", "þ" + "ᵺ" + "�?�", "th", "ʦ", "ts", "ꜩ", "tz", "Ù" + "Ú" + "Û" + "Ãœ" + "Ũ" + "Ū" + "Ŭ" + "Å®" + "Å°" + "Ų" + "Ư" + "Ç“" + "Ç•" + "Ç—" + "Ç℠¢" + "Ç›" + "È”" + "È–" + "É„" + "á´œ" + "áµ¾" + "á¹²" + "á¹´" + "Ṷ" + "Ṹ" + "Ṻ" + "Ụ" + "Ủ" + "Ứ" + "Ừ" + "Ử" + "á»®" + "á»°" + "â“Š" + "ï¼µ", "U", "ù" + "ú" + "û" + "ü" + "Å©" + "Å«" + "Å­" + "ů" + "ű" + "ų" + "Æ°" + "Ç”" + "Ç–" + "ǘ" + "Çš" + "Çœ" + "È•" + "È—" + "ʉ" + "ᵤ" + "ᶙ" + "á¹³" + "á¹µ" + "á¹·" + "á¹¹" + "á¹»" + "ụ" + "ủ" + "ứ" + "ừ" + "á»­" + "ữ" + "á»±" + "ⓤ" + "u", "u", "â’°", "(u)", "ᵫ", "ue", "Ʋ" + "É…" + "á´ " + "á¹¼" + "á¹¾" + "Ỽ" + "â“‹" + "�?�" + "�?�" + "V", "V", "Ê‹" + "ÊŒ" + "áµ¥" + "ᶌ" + "á¹½" + "ṿ" + "â“¥" + "â±±" + "â±´" + "�?�" + "ï½–", "v", "�?�", "VY", "â’±", "(v)", "�?�", "vy", "Å´" + "Ç·" + "á´¡" + "Ẁ" + "Ẃ" + "Ẅ" + "Ẇ" + "Ẉ" + "â“Œ" + "â±²" + "ï¼·", "W", "ŵ" + "Æ¿" + "�?" + "�?" + "ẃ" + "ẅ" + "ẇ" + "ẉ" + "ẘ" + "ⓦ" + "â±³" + "ï½—", "w", "â’²", "(w)", "Ẋ" + "Ẍ" + "�?" + "X", "X", "�?" + "ẋ" + "� ?" + "â‚“" + "ⓧ" + "x", "x", "â’³", "(x)", "�?" + "Ŷ" + "Ÿ" + "Ƴ" + "Ȳ" + "ÉŽ" + + "�?" + "Ẏ" + "Ỳ" + "á»´" + "Ỷ" + "Ỹ" + "Ỿ" + "â“Ž" + "ï¼¹", "Y", "ý" + "ÿ" + "Å·" + "Æ´" + "ȳ" + "�?" + "ÊŽ" + "�?" + "ẙ" + "ỳ" + "ỵ" + "á»·" + "ỹ" + "ỿ" + "ⓨ" + "ï½™", "y", "â’´", "(y)", "Ź" + "Å»" + "Ž" + "Ƶ" + "Èœ" + "Ȥ" + "á´¢" + "�?" + "Ẓ" + "Ẕ" + "�?" + "Ⱬ" + "�?�" + "Z", "Z", "ź" + "ż" + "ž" + "ƶ" + "�?" + "È¥" + "É€" + "�?" + "Ê‘" + "ᵶ" + "ᶎ" + "ẑ" + "ẓ" + "ẕ" + "â“©" + "ⱬ" + "�?�" + "z", "z", "â’µ", "(z)", "�?�" + "â‚€" + "⓪" + "â“¿" + "�?", "0", "¹" + "�?" + "â‘ " + "⓵" + "�?�" + "➀" + "➊" + "1", "1", "â’ˆ", "1.", "â‘´", "(1)", "²" + "â‚‚" + "â‘¡" + "⓶" + "�?�" + "�?" + "âž‹" + "ï¼’", "2", "â’‰", "2.", "⑵", "(2)", "³" + "₃" + "â‘¢" + "â“·" + "�?�" + "âž‚" + "➌" + "3", "3", "â’Š", "3.", "⑶", "(3)", "�?�" + "â‚„" + "â‘£" + "⓸" + "�?�" + "➃" + "�?" + "ï¼”", "4", "â’‹", "4.", "â‘·" , "(4)", "�?�" + "â‚…" + "⑤" + "⓹" + "�?�" + "âž„" + "➎" + "5", "5", "â’Œ", "5.", "⑸", "(5)", "�?�" + "₆" + "â‘¥" + "⓺" + "�?�" + "âž…" + "�?" + "ï¼–", "6", "�?", "6.", "⑹", "(6)", "�?�" + "₇" + "⑦" + "â“»" + "�?�" + "➆" + "�?" + "ï¼—", "7", "â’Ž", "7.", "⑺", "(7)", "�?�" + "₈" + "⑧" + "⓼" + "�?�" + "➇" + "âž‘" + "8", "8", "�?", "8.", "â‘»", "(8)", "�?�" + "₉" + "⑨" + "Ã� �“½" + "�?�" + "➈" + "âž’" + "ï¼™", "9", "�?", "9.", "⑼", "(9)", "â‘©" + "⓾" + "�?�" + "➉" + "âž“", "10", "â’‘", "10.", "⑽", "(10)", "⑪" + "â“«", "11", "â’’", "11.", "⑾", "(11)", "â‘«" + "⓬", "12", "â’“", "12.", "â‘¿", "(12)", "⑬" + "â“­", "13", "â’”", "13.", "â’€", "(13)", "â‘­" + "â“®", "14", "â’•", "14.", "�?", "(14)", "â‘®" + "⓯", "15", "â’–", "15.", "â’‚", "(15)", "⑯" + "â“°", "16", "â’—", "16.", "â’ƒ", "(16)", "â‘°" + "⓱", "17", "â’˜", "17.", "â’„", "(17)" , "⑱" + "⓲", "18", "â’™", "18.", "â’…", "(18)", "⑲" + "⓳", "19", "â’š", "19.", "â’†", "(19)", "⑳" + "â“´", "20", "â’›", + "20.", "â’‡", "(20)", "«" + "»" + "“" + "�?" + "„" + "″" + "‶" + "�??" + "�?�" + "�?�" + "�?�" + """, "\"", "‘" + "’" + "‚" + "‛" + "′" + "‵" + "‹" + "›" + "�?�" + "�?�" + "'", "'", "�?" + "‑" + "‒" + "–" + "—" + "�?�" + "â‚‹" + "�?", "-", "�?�" + "�?�" + "ï¼»", "[", "�?�" + "�?�" + "ï¼½", "]", "�?�" + "�?" + "�?�" + "�?�" + "( ", "(", "⸨", "((", "�?�" + "â‚Ž" + "�?�" + "�?�" + ")", ")", "⸩", "))", "�?�" + "�?�" + "<", "<", "�?�" + "�?�" + ">", ">", "�?�" + "ï½›", "{", "�?�" + "�?", "}", "�?�" + "â‚Š" + "+", "+", "�?�" + "â‚Œ" + "�?", "=", "�?", "!", "‼", "!!", "�?�", "!?", "#", "#", "$", "$", "�?�" + "ï¼…", "%", "&", "&", "�?�" + "*", "*", ",", ",", ".", ".", "�?�" + "�?", "/", ":", ":", "�??" + "ï¼ ›", ";", "?", "?", "�?�", "??", "�?�", "?!", "ï¼ ", "@", "ï¼¼", "\\", "‸" + "ï¼¾", "^", "_", "_", "�?�" + "~", "~"}; + + // Construct input text and expected output tokens + System.Collections.IList expectedOutputTokens = new System.Collections.ArrayList(); + System.Text.StringBuilder inputText = new System.Text.StringBuilder(); + for (int n = 0; n < foldings.Length; n += 2) + { + if (n > 0) + { + inputText.Append(' '); // Space between tokens + } + inputText.Append(foldings[n]); + + // Construct the expected output token: the ASCII string to fold to, + // duplicated as many times as the number of characters in the input text. + System.Text.StringBuilder expected = new System.Text.StringBuilder(); + int numChars = foldings[n].Length; + for (int m = 0; m < numChars; ++m) + { + expected.Append(foldings[n + 1]); + } + expectedOutputTokens.Add(expected.ToString()); + } + + TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(inputText.ToString())); + ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); + TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute)); + System.Collections.IEnumerator expectedIter = expectedOutputTokens.GetEnumerator(); + while (expectedIter.MoveNext()) + { + ; + AssertTermEquals((System.String) expectedIter.Current, filter, termAtt); + } + Assert.IsFalse(filter.IncrementToken()); + } + + internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt) + { + Assert.IsTrue(stream.IncrementToken()); + Assert.AreEqual(expected, termAtt.Term()); + } + } +} \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestAnalyzers.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestAnalyzers.cs?rev=832486&r1=832485&r2=832486&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestAnalyzers.cs (original) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestAnalyzers.cs Tue Nov 3 18:06:27 2009 @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,32 +19,24 @@ using NUnit.Framework; +using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer; using StandardTokenizer = Lucene.Net.Analysis.Standard.StandardTokenizer; +using PayloadAttribute = Lucene.Net.Analysis.Tokenattributes.PayloadAttribute; +using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute; using Payload = Lucene.Net.Index.Payload; -using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; namespace Lucene.Net.Analysis { - [TestFixture] - public class TestAnalyzers : LuceneTestCase + [TestFixture] + public class TestAnalyzers:BaseTokenStreamTestCase { - public virtual void AssertAnalyzesTo(Analyzer a, System.String input, System.String[] output) + public TestAnalyzers(System.String name):base(name) { - TokenStream ts = a.TokenStream("dummy", new System.IO.StringReader(input)); - Token reusableToken = new Token(); - for (int i = 0; i < output.Length; i++) - { - Token nextToken = ts.Next(reusableToken); - Assert.IsNotNull(nextToken); - Assert.AreEqual(nextToken.Term(), output[i]); - } - Assert.IsNull(ts.Next(reusableToken)); - ts.Close(); } - [Test] + [Test] public virtual void TestSimple() { Analyzer a = new SimpleAnalyzer(); @@ -58,7 +50,7 @@ AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"quoted", "word"}); } - [Test] + [Test] public virtual void TestNull() { Analyzer a = new WhitespaceAnalyzer(); @@ -72,7 +64,7 @@ AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"\"QUOTED\"", "word"}); } - [Test] + [Test] public virtual void TestStop() { Analyzer a = new StopAnalyzer(); @@ -82,98 +74,92 @@ internal virtual void VerifyPayload(TokenStream ts) { - Token reusableToken = new Token(); + PayloadAttribute payloadAtt = (PayloadAttribute) ts.GetAttribute(typeof(PayloadAttribute)); for (byte b = 1; ; b++) { - reusableToken.Clear(); - Token nextToken = ts.Next(reusableToken); - if (nextToken == null) + bool hasNext = ts.IncrementToken(); + if (!hasNext) break; - // System.out.println("id="+System.identityHashCode(nextToken) + " " + nextToken); + // System.out.println("id="+System.identityHashCode(nextToken) + " " + t); // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]); - Assert.AreEqual(b, nextToken.GetPayload().ToByteArray()[0]); + Assert.AreEqual(b, payloadAtt.GetPayload().ToByteArray()[0]); } } // Make sure old style next() calls result in a new copy of payloads - [Test] + [Test] public virtual void TestPayloadCopy() { System.String s = "how now brown cow"; TokenStream ts; ts = new WhitespaceTokenizer(new System.IO.StringReader(s)); - ts = new BuffTokenFilter(ts); ts = new PayloadSetter(ts); VerifyPayload(ts); ts = new WhitespaceTokenizer(new System.IO.StringReader(s)); ts = new PayloadSetter(ts); - ts = new BuffTokenFilter(ts); VerifyPayload(ts); } - - // LUCENE-1150: Just a compile time test to ensure the - // StandardAnalyzer constants remain publicly accessible - public virtual void _TestStandardConstants() - { - int x = StandardTokenizer.ALPHANUM; - x = StandardTokenizer.APOSTROPHE; - x = StandardTokenizer.ACRONYM; - x = StandardTokenizer.COMPANY; - x = StandardTokenizer.EMAIL; - x = StandardTokenizer.HOST; - x = StandardTokenizer.NUM; - x = StandardTokenizer.CJ; - string[] y = StandardTokenizer.TOKEN_TYPES; - } - } - - class BuffTokenFilter : TokenFilter - { - internal System.Collections.IList lst; - public BuffTokenFilter(TokenStream input) : base(input) - { + // LUCENE-1150: Just a compile time test, to ensure the + // StandardAnalyzer constants remain publicly accessible + public virtual void _testStandardConstants() + { + int x = StandardTokenizer.ALPHANUM; + x = StandardTokenizer.APOSTROPHE; + x = StandardTokenizer.ACRONYM; + x = StandardTokenizer.COMPANY; + x = StandardTokenizer.EMAIL; + x = StandardTokenizer.HOST; + x = StandardTokenizer.NUM; + x = StandardTokenizer.CJ; + System.String[] y = StandardTokenizer.TOKEN_TYPES; } - public override Token Next(Token reusableToken) + private class MyStandardAnalyzer:StandardAnalyzer { - if (lst == null) + public override TokenStream TokenStream(System.String field, System.IO.TextReader reader) { - lst = new System.Collections.ArrayList(); - for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken)) - { - lst.Add(nextToken.Clone()); - } + return new WhitespaceAnalyzer().TokenStream(field, reader); } - object tempObject = lst[0]; - lst.RemoveAt(0); - return lst.Count == 0 ? null : (Token) tempObject; + } + + [Test] + public virtual void TestSubclassOverridingOnlyTokenStream() + { + Analyzer a = new MyStandardAnalyzer(); + TokenStream ts = a.ReusableTokenStream("field", new System.IO.StringReader("the")); + // StandardAnalyzer will discard "the" (it's a + // stopword), by my subclass will not: + Assert.IsTrue(ts.IncrementToken()); + Assert.IsFalse(ts.IncrementToken()); } } - class PayloadSetter : TokenFilter + class PayloadSetter:TokenFilter { private void InitBlock() { p = new Payload(data, 0, 1); } - public PayloadSetter(TokenStream input) : base(input) + internal PayloadAttribute payloadAtt; + public PayloadSetter(TokenStream input):base(input) { InitBlock(); + payloadAtt = (PayloadAttribute) AddAttribute(typeof(PayloadAttribute)); } internal byte[] data = new byte[1]; internal Payload p; - public override Token Next(Token reusableToken) + public override bool IncrementToken() { - System.Diagnostics.Debug.Assert(reusableToken != null); - Token nextToken = input.Next(reusableToken); - if (nextToken == null) return null; - nextToken.SetPayload(p); // reuse the payload / byte[] + bool hasNext = input.IncrementToken(); + if (!hasNext) + return false; + payloadAtt.SetPayload(p); // reuse the payload / byte[] data[0]++; - return nextToken; + return true; } } } \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCachingTokenFilter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCachingTokenFilter.cs?rev=832486&r1=832485&r2=832486&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCachingTokenFilter.cs (original) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCachingTokenFilter.cs Tue Nov 3 18:06:27 2009 @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,6 +19,8 @@ using NUnit.Framework; +using OffsetAttribute = Lucene.Net.Analysis.Tokenattributes.OffsetAttribute; +using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using TermVector = Lucene.Net.Documents.Field.TermVector; @@ -28,14 +30,14 @@ using TermPositions = Lucene.Net.Index.TermPositions; using Directory = Lucene.Net.Store.Directory; using RAMDirectory = Lucene.Net.Store.RAMDirectory; -using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; namespace Lucene.Net.Analysis { - [TestFixture] - public class TestCachingTokenFilter : LuceneTestCase + + [TestFixture] + public class TestCachingTokenFilter:BaseTokenStreamTestCase { - private class AnonymousClassTokenStream : TokenStream + private class AnonymousClassTokenStream:TokenStream { public AnonymousClassTokenStream(TestCachingTokenFilter enclosingInstance) { @@ -44,6 +46,8 @@ private void InitBlock(TestCachingTokenFilter enclosingInstance) { this.enclosingInstance = enclosingInstance; + termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); + offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); } private TestCachingTokenFilter enclosingInstance; public TestCachingTokenFilter Enclosing_Instance @@ -55,23 +59,26 @@ } private int index = 0; + private TermAttribute termAtt; + private OffsetAttribute offsetAtt; - public override Token Next(Token reusableToken) + public override bool IncrementToken() { - System.Diagnostics.Debug.Assert(reusableToken != null); if (index == Enclosing_Instance.tokens.Length) { - return null; + return false; } else { - return reusableToken.Reinit(Enclosing_Instance.tokens[index++], 0, 0); + termAtt.SetTermBuffer(Enclosing_Instance.tokens[index++]); + offsetAtt.SetOffset(0, 0); + return true; } } } private System.String[] tokens = new System.String[]{"term1", "term2", "term3", "term2"}; - [NUnit.Framework.Test] + [Test] public virtual void TestCaching() { Directory dir = new RAMDirectory(); @@ -84,9 +91,9 @@ doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index - CheckTokens(stream); + checkTokens(stream); stream.Reset(); - CheckTokens(stream); + checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly @@ -113,17 +120,19 @@ // 3) reset stream and consume tokens again stream.Reset(); - CheckTokens(stream); + checkTokens(stream); } - private void CheckTokens(TokenStream stream) + private void checkTokens(TokenStream stream) { int count = 0; - Token reusableToken = new Token(); - for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken)) + + TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute)); + Assert.IsNotNull(termAtt); + while (stream.IncrementToken()) { Assert.IsTrue(count < tokens.Length); - Assert.AreEqual(tokens[count], nextToken.Term()); + Assert.AreEqual(tokens[count], termAtt.Term()); count++; } Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharArraySet.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCharArraySet.cs?rev=832486&r1=832485&r2=832486&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharArraySet.cs (original) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharArraySet.cs Tue Nov 3 18:06:27 2009 @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -24,21 +24,25 @@ namespace Lucene.Net.Analysis { - [TestFixture] - public class TestCharArraySet : LuceneTestCase + [TestFixture] + public class TestCharArraySet:LuceneTestCase { - [Test] + + internal static readonly System.String[] TEST_STOP_WORDS = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"}; + + + [Test] public virtual void TestRehash() { CharArraySet cas = new CharArraySet(0, true); - for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.Length; i++) - cas.Add(StopAnalyzer.ENGLISH_STOP_WORDS[i]); - Assert.AreEqual(StopAnalyzer.ENGLISH_STOP_WORDS.Length, cas.Count); - for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.Length; i++) - Assert.IsTrue(cas.Contains(StopAnalyzer.ENGLISH_STOP_WORDS[i])); + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) + cas.Add(TEST_STOP_WORDS[i]); + Assert.AreEqual(TEST_STOP_WORDS.Length, cas.Count); + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) + Assert.IsTrue(cas.Contains(TEST_STOP_WORDS[i])); } - - [Test] + + [Test] public virtual void TestNonZeroOffset() { System.String[] words = new System.String[]{"Hello", "World", "this", "is", "a", "test"}; @@ -47,6 +51,185 @@ for (int i = 0; i < words.Length; i++) { set_Renamed.Add(words[i]); } Assert.IsTrue(set_Renamed.Contains(findme, 1, 4)); Assert.IsTrue(set_Renamed.Contains(new System.String(findme, 1, 4))); + + // test unmodifiable + set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); + Assert.IsTrue(set_Renamed.Contains(findme, 1, 4)); + Assert.IsTrue(set_Renamed.Contains(new System.String(findme, 1, 4))); + } + + [Test] + public virtual void TestObjectContains() + { + CharArraySet set_Renamed = new CharArraySet(10, true); + System.Int32 val = 1; + set_Renamed.Add((System.Object) val); + Assert.IsTrue(set_Renamed.Contains((System.Object) val)); + Assert.IsTrue(set_Renamed.Contains((System.Object) 1)); + // test unmodifiable + set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); + Assert.IsTrue(set_Renamed.Contains((System.Object) val)); + Assert.IsTrue(set_Renamed.Contains((System.Object) 1)); + } + + [Test] + public virtual void TestClear() + { + CharArraySet set_Renamed = new CharArraySet(10, true); + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { set_Renamed.Add(TEST_STOP_WORDS[i]); } + Assert.AreEqual(TEST_STOP_WORDS.Length, set_Renamed.Count, "Not all words added"); + try + { + set_Renamed.Clear(); + Assert.Fail("remove is not supported"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.AreEqual(TEST_STOP_WORDS.Length, set_Renamed.Count, "Not all words added"); + } + } + + [Test] + public virtual void TestModifyOnUnmodifiable() + { + CharArraySet set_Renamed = new CharArraySet(10, true); + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { set_Renamed.Add(TEST_STOP_WORDS[i]); } + int size = set_Renamed.Count; + set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); + Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call"); + System.String NOT_IN_SET = "SirGallahad"; + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String already exists in set"); + + try + { + set_Renamed.Add(NOT_IN_SET.ToCharArray()); + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + + try + { + set_Renamed.Add(NOT_IN_SET); + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + + try + { + set_Renamed.Add(new System.Text.StringBuilder(NOT_IN_SET)); + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + + try + { + set_Renamed.Clear(); + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Changed unmodifiable set"); + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + try + { + set_Renamed.Add((System.Object) NOT_IN_SET); + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + try + { + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) + { + if (set_Renamed.Contains(TEST_STOP_WORDS[i])) + set_Renamed.Remove(TEST_STOP_WORDS[i]); // {{Aroush-2.9}} this should throw + } + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + + try + { + for (int i = 0; i < NOT_IN_SET.Length; i++) + { + if (!set_Renamed.Contains(NOT_IN_SET[i])) + set_Renamed.Remove(NOT_IN_SET[i]); // {{Aroush-2.9}} this should throw + } + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); + } + + try + { + for (int i = 0; i < NOT_IN_SET.Length; i++) + { + if (!set_Renamed.Contains(NOT_IN_SET[i])) + set_Renamed.Add(NOT_IN_SET[i]); // {{Aroush-2.9}} this should throw + } + Assert.Fail("Modified unmodifiable set"); + } + catch (System.NotSupportedException e) + { + // expected + Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); + } + + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) + { + Assert.IsTrue(set_Renamed.Contains(TEST_STOP_WORDS[i])); + } + } + + [Test] + public virtual void TestUnmodifiableSet() + { + CharArraySet set_Renamed = new CharArraySet(10, true); + for (int i = 0; i < TEST_STOP_WORDS.Length; i++) + if (!set_Renamed.Contains(TEST_STOP_WORDS[i])) + set_Renamed.Add(TEST_STOP_WORDS[i]); + int size = set_Renamed.Count; + set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); + Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call"); + + try + { + CharArraySet.UnmodifiableSet(null); + Assert.Fail("can not make null unmodifiable"); + } + catch (System.NullReferenceException e) + { + // expected + } } } } \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharFilter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCharFilter.cs?rev=832486&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharFilter.cs (added) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharFilter.cs Tue Nov 3 18:06:27 2009 @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using NUnit.Framework; + +using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + +namespace Lucene.Net.Analysis +{ + + [TestFixture] + public class TestCharFilter:LuceneTestCase + { + + [Test] + public virtual void TestCharFilter1() + { + CharStream cs = new CharFilter1(CharReader.Get(new System.IO.StringReader(""))); + Assert.AreEqual(1, cs.CorrectOffset(0), "corrected offset is invalid"); + } + + [Test] + public virtual void TestCharFilter2() + { + CharStream cs = new CharFilter2(CharReader.Get(new System.IO.StringReader(""))); + Assert.AreEqual(2, cs.CorrectOffset(0), "corrected offset is invalid"); + } + + [Test] + public virtual void TestCharFilter12() + { + CharStream cs = new CharFilter2(new CharFilter1(CharReader.Get(new System.IO.StringReader("")))); + Assert.AreEqual(3, cs.CorrectOffset(0), "corrected offset is invalid"); + } + + [Test] + public virtual void TestCharFilter11() + { + CharStream cs = new CharFilter1(new CharFilter1(CharReader.Get(new System.IO.StringReader("")))); + Assert.AreEqual(2, cs.CorrectOffset(0), "corrected offset is invalid"); + } + + internal class CharFilter1:CharFilter + { + + protected internal CharFilter1(CharStream in_Renamed):base(in_Renamed) + { + } + + public /*protected internal*/ override int Correct(int currentOff) + { + return currentOff + 1; + } + } + + internal class CharFilter2:CharFilter + { + + protected internal CharFilter2(CharStream in_Renamed):base(in_Renamed) + { + } + + public /*protected internal*/ override int Correct(int currentOff) + { + return currentOff + 2; + } + } + } +} \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharacterCache.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCharacterCache.cs?rev=832486&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharacterCache.cs (added) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharacterCache.cs Tue Nov 3 18:06:27 2009 @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using NUnit.Framework; + +using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + +namespace Lucene.Net.Analysis +{ + /// Testcase for {@link CharacterCache} + [TestFixture] + public class TestCharacterCache:LuceneTestCase + { + + [Test] + public virtual void TestValueOf() + { + for (int i = 0; i < 256; i++) + { + System.Char valueOf = CharacterCache.ValueOf((char) i); + Assert.AreEqual((char) i, valueOf); + } + } + } +} \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestISOLatin1AccentFilter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestISOLatin1AccentFilter.cs?rev=832486&r1=832485&r2=832486&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestISOLatin1AccentFilter.cs (original) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestISOLatin1AccentFilter.cs Tue Nov 3 18:06:27 2009 @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,96 +19,102 @@ using NUnit.Framework; -using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; +using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute; namespace Lucene.Net.Analysis { - [TestFixture] - public class TestISOLatin1AccentFilter : LuceneTestCase + [TestFixture] + public class TestISOLatin1AccentFilter:BaseTokenStreamTestCase { - [Test] + [Test] public virtual void TestU() { - TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä Ã¥ æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); + TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés À LA CHAÎNE À �? Â Ã Ä Å Æ Ç È É Ê Ë Ì �? Î �? IJ �? Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü �? Ÿ à á â ã ä Ã¥ æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ �? fl")); ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream); - Token reusableToken = new Token(); - Assert.AreEqual("Des", filter.Next(reusableToken).Term()); - Assert.AreEqual("mot", filter.Next(reusableToken).Term()); - Assert.AreEqual("cles", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("LA", filter.Next(reusableToken).Term()); - Assert.AreEqual("CHAINE", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("A", filter.Next(reusableToken).Term()); - Assert.AreEqual("AE", filter.Next(reusableToken).Term()); - Assert.AreEqual("C", filter.Next(reusableToken).Term()); - Assert.AreEqual("E", filter.Next(reusableToken).Term()); - Assert.AreEqual("E", filter.Next(reusableToken).Term()); - Assert.AreEqual("E", filter.Next(reusableToken).Term()); - Assert.AreEqual("E", filter.Next(reusableToken).Term()); - Assert.AreEqual("I", filter.Next(reusableToken).Term()); - Assert.AreEqual("I", filter.Next(reusableToken).Term()); - Assert.AreEqual("I", filter.Next(reusableToken).Term()); - Assert.AreEqual("I", filter.Next(reusableToken).Term()); - Assert.AreEqual("IJ", filter.Next(reusableToken).Term()); - Assert.AreEqual("D", filter.Next(reusableToken).Term()); - Assert.AreEqual("N", filter.Next(reusableToken).Term()); - Assert.AreEqual("O", filter.Next(reusableToken).Term()); - Assert.AreEqual("O", filter.Next(reusableToken).Term()); - Assert.AreEqual("O", filter.Next(reusableToken).Term()); - Assert.AreEqual("O", filter.Next(reusableToken).Term()); - Assert.AreEqual("O", filter.Next(reusableToken).Term()); - Assert.AreEqual("O", filter.Next(reusableToken).Term()); - Assert.AreEqual("OE", filter.Next(reusableToken).Term()); - Assert.AreEqual("TH", filter.Next(reusableToken).Term()); - Assert.AreEqual("U", filter.Next(reusableToken).Term()); - Assert.AreEqual("U", filter.Next(reusableToken).Term()); - Assert.AreEqual("U", filter.Next(reusableToken).Term()); - Assert.AreEqual("U", filter.Next(reusableToken).Term()); - Assert.AreEqual("Y", filter.Next(reusableToken).Term()); - Assert.AreEqual("Y", filter.Next(reusableToken).Term()); - Assert.AreEqual("a", filter.Next(reusableToken).Term()); - Assert.AreEqual("a", filter.Next(reusableToken).Term()); - Assert.AreEqual("a", filter.Next(reusableToken).Term()); - Assert.AreEqual("a", filter.Next(reusableToken).Term()); - Assert.AreEqual("a", filter.Next(reusableToken).Term()); - Assert.AreEqual("a", filter.Next(reusableToken).Term()); - Assert.AreEqual("ae", filter.Next(reusableToken).Term()); - Assert.AreEqual("c", filter.Next(reusableToken).Term()); - Assert.AreEqual("e", filter.Next(reusableToken).Term()); - Assert.AreEqual("e", filter.Next(reusableToken).Term()); - Assert.AreEqual("e", filter.Next(reusableToken).Term()); - Assert.AreEqual("e", filter.Next(reusableToken).Term()); - Assert.AreEqual("i", filter.Next(reusableToken).Term()); - Assert.AreEqual("i", filter.Next(reusableToken).Term()); - Assert.AreEqual("i", filter.Next(reusableToken).Term()); - Assert.AreEqual("i", filter.Next(reusableToken).Term()); - Assert.AreEqual("ij", filter.Next(reusableToken).Term()); - Assert.AreEqual("d", filter.Next(reusableToken).Term()); - Assert.AreEqual("n", filter.Next(reusableToken).Term()); - Assert.AreEqual("o", filter.Next(reusableToken).Term()); - Assert.AreEqual("o", filter.Next(reusableToken).Term()); - Assert.AreEqual("o", filter.Next(reusableToken).Term()); - Assert.AreEqual("o", filter.Next(reusableToken).Term()); - Assert.AreEqual("o", filter.Next(reusableToken).Term()); - Assert.AreEqual("o", filter.Next(reusableToken).Term()); - Assert.AreEqual("oe", filter.Next(reusableToken).Term()); - Assert.AreEqual("ss", filter.Next(reusableToken).Term()); - Assert.AreEqual("th", filter.Next(reusableToken).Term()); - Assert.AreEqual("u", filter.Next(reusableToken).Term()); - Assert.AreEqual("u", filter.Next(reusableToken).Term()); - Assert.AreEqual("u", filter.Next(reusableToken).Term()); - Assert.AreEqual("u", filter.Next(reusableToken).Term()); - Assert.AreEqual("y", filter.Next(reusableToken).Term()); - Assert.AreEqual("y", filter.Next(reusableToken).Term()); - Assert.AreEqual("fi", filter.Next(reusableToken).Term()); - Assert.AreEqual("fl", filter.Next(reusableToken).Term()); - Assert.IsNull(filter.Next(reusableToken)); + TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute)); + AssertTermEquals("Des", filter, termAtt); + AssertTermEquals("mot", filter, termAtt); + AssertTermEquals("cles", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("LA", filter, termAtt); + AssertTermEquals("CHAINE", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("A", filter, termAtt); + AssertTermEquals("AE", filter, termAtt); + AssertTermEquals("C", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("E", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("I", filter, termAtt); + AssertTermEquals("IJ", filter, termAtt); + AssertTermEquals("D", filter, termAtt); + AssertTermEquals("N", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("O", filter, termAtt); + AssertTermEquals("OE", filter, termAtt); + AssertTermEquals("TH", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("U", filter, termAtt); + AssertTermEquals("Y", filter, termAtt); + AssertTermEquals("Y", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("a", filter, termAtt); + AssertTermEquals("ae", filter, termAtt); + AssertTermEquals("c", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("e", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("i", filter, termAtt); + AssertTermEquals("ij", filter, termAtt); + AssertTermEquals("d", filter, termAtt); + AssertTermEquals("n", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("o", filter, termAtt); + AssertTermEquals("oe", filter, termAtt); + AssertTermEquals("ss", filter, termAtt); + AssertTermEquals("th", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("u", filter, termAtt); + AssertTermEquals("y", filter, termAtt); + AssertTermEquals("y", filter, termAtt); + AssertTermEquals("fi", filter, termAtt); + AssertTermEquals("fl", filter, termAtt); + Assert.IsFalse(filter.IncrementToken()); + } + + internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt) + { + Assert.IsTrue(stream.IncrementToken()); + Assert.AreEqual(expected, termAtt.Term()); } } } \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestKeywordAnalyzer.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestKeywordAnalyzer.cs?rev=832486&r1=832485&r2=832486&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestKeywordAnalyzer.cs (original) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestKeywordAnalyzer.cs Tue Nov 3 18:06:27 2009 @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,6 +19,7 @@ using NUnit.Framework; +using OffsetAttribute = Lucene.Net.Analysis.Tokenattributes.OffsetAttribute; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using IndexReader = Lucene.Net.Index.IndexReader; @@ -27,28 +28,27 @@ using TermDocs = Lucene.Net.Index.TermDocs; using QueryParser = Lucene.Net.QueryParsers.QueryParser; using RAMDirectory = Lucene.Net.Store.RAMDirectory; -using ScoreDoc = Lucene.Net.Search.ScoreDoc; using IndexSearcher = Lucene.Net.Search.IndexSearcher; using Query = Lucene.Net.Search.Query; -using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; +using ScoreDoc = Lucene.Net.Search.ScoreDoc; namespace Lucene.Net.Analysis { - [TestFixture] - public class TestKeywordAnalyzer : LuceneTestCase + [TestFixture] + public class TestKeywordAnalyzer:BaseTokenStreamTestCase { private RAMDirectory directory; private IndexSearcher searcher; [SetUp] - public override void SetUp() + public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); -//writer.SetInfoStream(System.Console.Out); + Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED)); @@ -58,32 +58,14 @@ searcher = new IndexSearcher(directory); } - - //[Test] - //public void TestSameThreadConsecutive() - //{ - // TestMultipleDocument(); - // TestPerFieldAnalyzer(); - //} - - //[Test] - //public void TestDistinctThreadConsecutive() - //{ - // SupportClass.ThreadClass thread1 = new SupportClass.ThreadClass(new System.Threading.ThreadStart(TestMultipleDocument)); - // thread1.Start(); - // System.Threading.Thread.CurrentThread.Join(); - // SupportClass.ThreadClass thread2 = new SupportClass.ThreadClass(new System.Threading.ThreadStart(TestPerFieldAnalyzer)); - // thread2.Start(); - // System.Threading.Thread.CurrentThread.Join(); - //} - + [Test] public virtual void TestPerFieldAnalyzer() { - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); analyzer.AddAnalyzer("partnum", new KeywordAnalyzer()); - - Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("description", analyzer); + + QueryParser queryParser = new QueryParser("description", analyzer); Query query = queryParser.Parse("partnum:Q36 AND SPACE"); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; @@ -91,8 +73,8 @@ Assert.AreEqual(1, hits.Length, "doc found!"); } - [Test] - public virtual void TestMultipleDocument() + [Test] + public virtual void TestMutipleDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); @@ -104,14 +86,22 @@ writer.AddDocument(doc); writer.Close(); - IndexReader reader = IndexReader.Open(dir); - // following is the line whose inclusion causes TestPerFieldAnalyzer to fail: - TermDocs td = reader.TermDocs(new Term("partnum", "Q36")); - Assert.IsTrue(td.Next()); - td = reader.TermDocs(new Term("partnum", "Q37")); - Assert.IsTrue(td.Next()); -//this fixes TestPerFieldAnalyzer: -//((Lucene.Net.Index.SegmentReader)reader).foo(); + IndexReader reader = IndexReader.Open(dir); + TermDocs td = reader.TermDocs(new Term("partnum", "Q36")); + Assert.IsTrue(td.Next()); + td = reader.TermDocs(new Term("partnum", "Q37")); + Assert.IsTrue(td.Next()); + } + + // LUCENE-1441 + [Test] + public virtual void TestOffsets() + { + TokenStream stream = new KeywordAnalyzer().TokenStream("field", new System.IO.StringReader("abcd")); + OffsetAttribute offsetAtt = (OffsetAttribute) stream.AddAttribute(typeof(OffsetAttribute)); + Assert.IsTrue(stream.IncrementToken()); + Assert.AreEqual(0, offsetAtt.StartOffset()); + Assert.AreEqual(4, offsetAtt.EndOffset()); } } } \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestLengthFilter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestLengthFilter.cs?rev=832486&r1=832485&r2=832486&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestLengthFilter.cs (original) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestLengthFilter.cs Tue Nov 3 18:06:27 2009 @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,24 +19,29 @@ using NUnit.Framework; -using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; +using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute; namespace Lucene.Net.Analysis { - [TestFixture] - public class TestLengthFilter : LuceneTestCase + [TestFixture] + public class TestLengthFilter:BaseTokenStreamTestCase { - [Test] + + [Test] public virtual void TestFilter() { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("short toolong evenmuchlongertext a ab toolong foo")); LengthFilter filter = new LengthFilter(stream, 2, 6); - Token reusableToken = new Token(); - Assert.AreEqual("short", filter.Next(reusableToken).Term()); - Assert.AreEqual("ab", filter.Next(reusableToken).Term()); - Assert.AreEqual("foo", filter.Next(reusableToken).Term()); - Assert.IsNull(filter.Next(reusableToken)); + TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute)); + + Assert.IsTrue(filter.IncrementToken()); + Assert.AreEqual("short", termAtt.Term()); + Assert.IsTrue(filter.IncrementToken()); + Assert.AreEqual("ab", termAtt.Term()); + Assert.IsTrue(filter.IncrementToken()); + Assert.AreEqual("foo", termAtt.Term()); + Assert.IsFalse(filter.IncrementToken()); } } } \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestMappingCharFilter.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestMappingCharFilter.cs?rev=832486&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestMappingCharFilter.cs (added) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestMappingCharFilter.cs Tue Nov 3 18:06:27 2009 @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using NUnit.Framework; + +namespace Lucene.Net.Analysis +{ + + [TestFixture] + public class TestMappingCharFilter:BaseTokenStreamTestCase + { + + internal NormalizeCharMap normMap; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + normMap = new NormalizeCharMap(); + + normMap.Add("aa", "a"); + normMap.Add("bbb", "b"); + normMap.Add("cccc", "cc"); + + normMap.Add("h", "i"); + normMap.Add("j", "jj"); + normMap.Add("k", "kkk"); + normMap.Add("ll", "llll"); + + normMap.Add("empty", ""); + } + + [Test] + public virtual void TestReaderReset() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("x"))); + char[] buf = new char[10]; + int len = cs.Read(buf, 0, 10); + Assert.AreEqual(1, len); + Assert.AreEqual('x', buf[0]); + len = cs.Read(buf, 0, 10); + Assert.AreEqual(- 1, len); + + // rewind + cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("x"))); + len = cs.Read(buf, 0, 10); + Assert.AreEqual(1, len); + Assert.AreEqual('x', buf[0]); + } + + [Test] + public virtual void TestNothingChange() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("x"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"x"}, new int[]{0}, new int[]{1}); + } + + [Test] + public virtual void Test1to1() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("h"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"i"}, new int[]{0}, new int[]{1}); + } + + [Test] + public virtual void Test1to2() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("j"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"jj"}, new int[]{0}, new int[]{1}); + } + + [Test] + public virtual void Test1to3() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("k"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"kkk"}, new int[]{0}, new int[]{1}); + } + + [Test] + public virtual void Test2to4() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("ll"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"llll"}, new int[]{0}, new int[]{2}); + } + + [Test] + public virtual void Test2to1() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("aa"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"a"}, new int[]{0}, new int[]{2}); + } + + [Test] + public virtual void Test3to1() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("bbb"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"b"}, new int[]{0}, new int[]{3}); + } + + [Test] + public virtual void Test4to2() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("cccc"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"cc"}, new int[]{0}, new int[]{4}); + } + + [Test] + public virtual void Test5to0() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("empty"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[0]); + } + + // + // 1111111111222 + // 01234567890123456789012 + //(in) h i j k ll cccc bbb aa + // + // 1111111111222 + // 01234567890123456789012 + //(out) i i jj kkk llll cc b a + // + // h, 0, 1 => i, 0, 1 + // i, 2, 3 => i, 2, 3 + // j, 4, 5 => jj, 4, 5 + // k, 6, 7 => kkk, 6, 7 + // ll, 8,10 => llll, 8,10 + // cccc,11,15 => cc,11,15 + // bbb,16,19 => b,16,19 + // aa,20,22 => a,20,22 + // + [Test] + public virtual void TestTokenStream() + { + CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("h i j k ll cccc bbb aa"))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"i", "i", "jj", "kkk", "llll", "cc", "b", "a"}, new int[]{0, 2, 4, 6, 8, 11, 16, 20}, new int[]{1, 3, 5, 7, 10, 15, 19, 22}); + } + + // + // + // 0123456789 + //(in) aaaa ll h + //(out-1) aa llll i + //(out-2) a llllllll i + // + // aaaa,0,4 => a,0,4 + // ll,5,7 => llllllll,5,7 + // h,8,9 => i,8,9 + [Test] + public virtual void TestChained() + { + CharStream cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("aaaa ll h")))); + TokenStream ts = new WhitespaceTokenizer(cs); + AssertTokenStreamContents(ts, new System.String[]{"a", "llllllll", "i"}, new int[]{0, 5, 8}, new int[]{4, 7, 9}); + } + } +} \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestNumericTokenStream.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestNumericTokenStream.cs?rev=832486&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestNumericTokenStream.cs (added) +++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestNumericTokenStream.cs Tue Nov 3 18:06:27 2009 @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using NUnit.Framework; + +using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute; +using TypeAttribute = Lucene.Net.Analysis.Tokenattributes.TypeAttribute; +using NumericUtils = Lucene.Net.Util.NumericUtils; + +namespace Lucene.Net.Analysis +{ + + [TestFixture] + public class TestNumericTokenStream:BaseTokenStreamTestCase + { + + internal const long lvalue = 4573245871874382L; + internal const int ivalue = 123456; + + [Test] + public virtual void TestLongStream() + { + NumericTokenStream stream = new NumericTokenStream().SetLongValue(lvalue); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute)); + TypeAttribute typeAtt = (TypeAttribute) stream.GetAttribute(typeof(TypeAttribute)); + for (int shift = 0; shift < 64; shift += NumericUtils.PRECISION_STEP_DEFAULT) + { + Assert.IsTrue(stream.IncrementToken(), "New token is available"); + Assert.AreEqual(NumericUtils.LongToPrefixCoded(lvalue, shift), termAtt.Term(), "Term is correctly encoded"); + Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct"); + } + Assert.IsFalse(stream.IncrementToken(), "No more tokens available"); + } + + [Test] + public virtual void TestIntStream() + { + NumericTokenStream stream = new NumericTokenStream().SetIntValue(ivalue); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute)); + TypeAttribute typeAtt = (TypeAttribute) stream.GetAttribute(typeof(TypeAttribute)); + for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT) + { + Assert.IsTrue(stream.IncrementToken(), "New token is available"); + Assert.AreEqual(NumericUtils.IntToPrefixCoded(ivalue, shift), termAtt.Term(), "Term is correctly encoded"); + Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct"); + } + Assert.IsFalse(stream.IncrementToken(), "No more tokens available"); + } + + [Test] + public virtual void TestNotInitialized() + { + NumericTokenStream stream = new NumericTokenStream(); + + try + { + stream.Reset(); + Assert.Fail("reset() should not succeed."); + } + catch (System.SystemException e) + { + // pass + } + + try + { + stream.IncrementToken(); + Assert.Fail("incrementToken() should not succeed."); + } + catch (System.SystemException e) + { + // pass + } + } + } +} \ No newline at end of file