Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 23456 invoked from network); 24 Aug 2009 16:44:28 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 24 Aug 2009 16:44:28 -0000 Received: (qmail 81975 invoked by uid 500); 24 Aug 2009 12:44:52 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 81920 invoked by uid 500); 24 Aug 2009 12:44:51 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 81911 invoked by uid 99); 24 Aug 2009 12:44:51 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Aug 2009 12:44:51 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Aug 2009 12:44:39 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 56C2C23888FF; Mon, 24 Aug 2009 12:44:18 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r807190 [2/2] - in /lucene/java/trunk: contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/test/org/apache/lucene/analysis... Date: Mon, 24 Aug 2009 12:44:16 -0000 To: java-commits@lucene.apache.org From: uschindler@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090824124418.56C2C23888FF@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java Mon Aug 24 12:44:13 2009 @@ -17,15 +17,14 @@ * limitations under the License. */ -import junit.framework.TestCase; - +import org.apache.lucene.util.LuceneTestCase; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.io.FileInputStream; import java.util.ArrayList; -public class TestRussianStem extends TestCase +public class TestRussianStem extends LuceneTestCase { private ArrayList words = new ArrayList(); private ArrayList stems = new ArrayList(); Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Mon Aug 24 12:44:13 2009 @@ -20,15 +20,14 @@ import java.io.Reader; import java.io.StringReader; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LetterTokenizer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -44,19 +43,13 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import junit.framework.TestCase; - /** * A test class for ShingleAnalyzerWrapper as regards queries and scoring. */ -public class ShingleAnalyzerWrapperTest extends TestCase { +public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { public IndexSearcher searcher; - public static void main(String[] args) { - junit.textui.TestRunner.run(ShingleAnalyzerWrapperTest.class); - } - /** * Set up a new index in RAM with three test phrases and the supplied Analyzer. * @@ -233,8 +226,7 @@ assertAnalyzesToReuse(a, "this is a test", new String[] { "this", "is", "a", "test" }, new int[] { 0, 5, 8, 10 }, - new int[] { 4, 7, 9, 14 }, - new int[] { 1, 1, 1, 1 }); + new int[] { 4, 7, 9, 14 }); } /* @@ -269,25 +261,4 @@ new int[] { 6, 13, 13, 18, 18, 27, 27 }, new int[] { 1, 0, 1, 0, 1, 0, 1 }); } - - private void assertAnalyzesToReuse(Analyzer a, String input, String[] output, - int[] startOffsets, int[] endOffsets, int[] posIncr) throws Exception { - TokenStream ts = a.reusableTokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts - .getAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts - .getAttribute(OffsetAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts - .getAttribute(PositionIncrementAttribute.class); - - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(output[i], termAtt.term()); - assertEquals(startOffsets[i], offsetAtt.startOffset()); - assertEquals(endOffsets[i], offsetAtt.endOffset()); - assertEquals(posIncr[i], posIncAtt.getPositionIncrement()); - } - - assertFalse(ts.incrementToken()); - } } Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Mon Aug 24 12:44:13 2009 @@ -20,18 +20,14 @@ import java.io.IOException; import java.io.StringReader; -import junit.framework.TestCase; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.*; -public class ShingleFilterTest extends TestCase { +public class ShingleFilterTest extends BaseTokenStreamTestCase { public class TestTokenStream extends TokenStream { @@ -53,6 +49,7 @@ } public final boolean incrementToken() throws IOException { + clearAttributes(); if (index < testToken.length) { Token t = testToken[index++]; termAtt.setTermBuffer(t.termBuffer(), 0, t.termLength()); @@ -66,10 +63,6 @@ } } - public static void main(String[] args) { - junit.textui.TestRunner.run(ShingleFilterTest.class); - } - public static final Token[] TEST_TOKEN = new Token[] { createToken("please", 0, 6), createToken("divide", 7, 13), @@ -188,15 +181,19 @@ public void testReset() throws Exception { Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this sentence")); TokenStream filter = new ShingleFilter(wsTokenizer, 2); - TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class); - assertTrue(filter.incrementToken()); - assertEquals("(please,0,6)", termAtt.toString()); - assertTrue(filter.incrementToken()); - assertEquals("(please divide,0,13,type=shingle,posIncr=0)", termAtt.toString()); + assertTokenStreamContents(filter, + new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, + new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, + new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE}, + new int[]{1,0,1,0,1,0,1} + ); wsTokenizer.reset(new StringReader("please divide this sentence")); - filter.reset(); - assertTrue(filter.incrementToken()); - assertEquals("(please,0,6)", termAtt.toString()); + assertTokenStreamContents(filter, + new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, + new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, + new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE}, + new int[]{1,0,1,0,1,0,1} + ); } protected void shingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java Mon Aug 24 12:44:13 2009 @@ -21,9 +21,10 @@ import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; +import java.util.HashSet; +import java.util.Arrays; -import junit.framework.TestCase; - +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; @@ -33,15 +34,16 @@ import org.apache.lucene.analysis.payloads.PayloadHelper; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.tokenattributes.*; -public class TestShingleMatrixFilter extends TestCase { +public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { + public TestShingleMatrixFilter(String name) { + // use this ctor, because SingleTokenTokenStream only uses next(Token), so exclude it + super(name, new HashSet(Arrays.asList(new String[]{ + "testBehavingAsShingleFilter", "testMatrix" + }))); + } public void testBehavingAsShingleFilter() throws IOException { Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java Mon Aug 24 12:44:13 2009 @@ -21,26 +21,18 @@ import java.text.SimpleDateFormat; import java.util.Locale; -import junit.framework.TestCase; - +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; -public class DateRecognizerSinkTokenizerTest extends TestCase { +public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase { public DateRecognizerSinkTokenizerTest(String s) { super(s); } - protected void setUp() { - } - - protected void tearDown() { - - } - public void test() throws IOException { DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US)); String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006"; Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Mon Aug 24 12:44:13 2009 @@ -19,26 +19,18 @@ import java.io.IOException; import java.io.StringReader; -import junit.framework.TestCase; - +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream; -public class TokenRangeSinkTokenizerTest extends TestCase { +public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase { public TokenRangeSinkTokenizerTest(String s) { super(s); } - protected void setUp() { - } - - protected void tearDown() { - - } - public void test() throws IOException { TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4); String test = "The quick red fox jumped over the lazy brown dogs"; Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java Mon Aug 24 12:44:13 2009 @@ -19,8 +19,7 @@ import java.io.IOException; import java.io.StringReader; -import junit.framework.TestCase; - +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TeeSinkTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -29,20 +28,13 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -public class TokenTypeSinkTokenizerTest extends TestCase { +public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase { public TokenTypeSinkTokenizerTest(String s) { super(s); } - protected void setUp() { - } - - protected void tearDown() { - - } - public void test() throws IOException { TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); String test = "The quick red fox jumped over the lazy brown dogs"; Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original) +++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Mon Aug 24 12:44:13 2009 @@ -18,16 +18,11 @@ */ import java.io.Reader; -import java.io.StringReader; - -import junit.framework.TestCase; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; /** * Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer @@ -35,7 +30,7 @@ * @version 0.1 */ -public class TestThaiAnalyzer extends TestCase { +public class TestThaiAnalyzer extends BaseTokenStreamTestCase { /* * testcase for offsets @@ -71,56 +66,6 @@ new String[] { "", "", "", "", "", "" }); } */ - - public void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[]) - throws Exception { - - TokenStream ts = a.tokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(termAtt.term(), output[i]); - if (startOffsets != null) - assertEquals(offsetAtt.startOffset(), startOffsets[i]); - if (endOffsets != null) - assertEquals(offsetAtt.endOffset(), endOffsets[i]); - if (types != null) - assertEquals(typeAtt.type(), types[i]); - } - assertFalse(ts.incrementToken()); - ts.close(); - } - - public void assertAnalyzesToReuse(Analyzer a, String input, String[] output) - throws Exception { - - TokenStream ts = a.reusableTokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts - .addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts - .addAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) ts - .addAttribute(TypeAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(termAtt.term(), output[i]); - } - assertFalse(ts.incrementToken()); - } - - public void assertAnalyzesTo(Analyzer a, String input, String[] output) throws Exception { - assertAnalyzesTo(a, input, output, null, null, null); - } - - public void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws Exception { - assertAnalyzesTo(a, input, output, null, null, types); - } - - public void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws Exception { - assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null); - } public void testAnalyzer() throws Exception { ThaiAnalyzer analyzer = new ThaiAnalyzer(); Modified: lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/TestSmartChineseAnalyzer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/TestSmartChineseAnalyzer.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/TestSmartChineseAnalyzer.java (original) +++ lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/TestSmartChineseAnalyzer.java Mon Aug 24 12:44:13 2009 @@ -20,20 +20,13 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.util.Date; -import junit.framework.TestCase; - +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -public class TestSmartChineseAnalyzer extends TestCase { +public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { public void testChineseStopWordsDefault() throws Exception { Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */ @@ -77,20 +70,11 @@ assertAnalyzesTo(ca, sentence, result); } - public void testChineseAnalyzer() throws IOException { - Token nt = new Token(); + public void testChineseAnalyzer() throws Exception { Analyzer ca = new SmartChineseAnalyzer(true); - Reader sentence = new StringReader("我购买了道具和服装。"); + String sentence = "我购买了道具和服装。"; String[] result = { "我", "è´­ä¹°", "了", "道具", "和", "服装" }; - TokenStream ts = ca.tokenStream("sentence", sentence); - int i = 0; - nt = ts.next(nt); - while (nt != null) { - assertEquals(result[i], nt.term()); - i++; - nt = ts.next(nt); - } - ts.close(); + assertAnalyzesTo(ca, sentence, result); } /* @@ -165,90 +149,4 @@ new int[] { 0, 1, 3, 4, 6, 7 }, new int[] { 1, 3, 4, 6, 7, 9 }); } - - public void assertAnalyzesToReuse(Analyzer a, String input, String[] output, - int startOffsets[], int endOffsets[]) throws Exception { - - TokenStream ts = a.reusableTokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(termAtt.term(), output[i]); - assertEquals(offsetAtt.startOffset(), startOffsets[i]); - assertEquals(offsetAtt.endOffset(), endOffsets[i]); - } - assertFalse(ts.incrementToken()); - } - - public void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[]) - throws Exception { - - TokenStream ts = a.tokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(termAtt.term(), output[i]); - if (startOffsets != null) - assertEquals(offsetAtt.startOffset(), startOffsets[i]); - if (endOffsets != null) - assertEquals(offsetAtt.endOffset(), endOffsets[i]); - if (types != null) - assertEquals(typeAtt.type(), types[i]); - } - assertFalse(ts.incrementToken()); - ts.close(); - } - -public void assertAnalyzesTo(Analyzer a, String input, String[] output) throws Exception { - assertAnalyzesTo(a, input, output, null, null, null); -} - -public void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws Exception { - assertAnalyzesTo(a, input, output, null, null, types); -} - -public void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws Exception { - assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null); -} - - - /** - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException { - new TestSmartChineseAnalyzer().sampleMethod(); - } - - /** - * @throws UnsupportedEncodingException - * @throws FileNotFoundException - * @throws IOException - */ - private void sampleMethod() throws UnsupportedEncodingException, - FileNotFoundException, IOException { - Token nt = new Token(); - Analyzer ca = new SmartChineseAnalyzer(true); - Reader sentence = new StringReader( - "我从小就不由自主地认为自己长大以后一定得成为一个象我父亲一样的画家, 可能是父母潜移默化的影响。其实我根本不知道作为画家意味着什么,我是否喜欢,最重要的是否适合我,我是否有这个才华。其实人到中年的我还是不确定我最喜欢什么,最想做的是什么?我相信很多人和我一样有同样的烦恼。毕竟不是每个人都能成为作文里的宇航员,科学家和大教授。知道 自己适合做什么,喜欢做什么,能做好什么其实是个非常困难的问题。" - + "幸运的是,我想我的孩子不会为这个太过烦恼。通过老大,我慢慢发现美国高中的一个重要功能就是帮助学生分析他们的专长和兴趣,从而帮助他们选择大学的专业和未来的职业。我觉得帮助一个未成形的孩子找到她未来成长的方向是个非常重要的过程。" - + "美国高中都有专门的职业顾问,通过接触不同的课程,和各种心理,个性,兴趣很多方面的问答来帮助每个学生找到最感兴趣的专业。这样的教育一般是要到高年级才开始, 可老大因为今年上计算机的课程就是研究一个职业走向的软件项目,所以她提前做了这些考试和面试。看来以后这样的教育会慢慢由电脑来测试了。老大带回家了一些试卷,我挑出一些给大家看看� �€‚这门课她花了2个多月才做完,这里只是很小的一部分。" - + "在测试里有这样的一些问题:" - + "你是个喜欢动手的人吗? 你喜欢修东西吗?你喜欢体育运动吗?你喜欢在室外工作吗?你是个喜欢思考的人吗?你喜欢数学和科学课吗?你喜欢一个人工作吗?你对自己的智力自信吗?你的创造能力很强吗?你喜欢艺术,音乐和戏剧吗? 你喜欢自由自在的工作环境吗?你喜欢尝试新的东西吗? ä½ å–œæ¬¢å¸®åŠ©åˆ«äººå—ï¼Ÿä½ å–œæ¬¢æ•™åˆ«äººå—ï¼Ÿä½ å–œæ¬¢å’Œæœºå™¨å’Œå·¥å…·æ‰“äº¤é“å—ï¼Ÿä½ å– œæ¬¢å½“领导吗?你喜欢组织活动吗?你什么和数字打交道吗?"); - TokenStream ts = ca.tokenStream("sentence", sentence); - - System.out.println("start: " + (new Date())); - long before = System.currentTimeMillis(); - nt = ts.next(nt); - while (nt != null) { - System.out.println(nt.term()); - nt = ts.next(nt); - } - ts.close(); - long now = System.currentTimeMillis(); - System.out.println("time: " + (now - before) / 1000.0 + " s"); - } } Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java (original) +++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java Mon Aug 24 12:44:13 2009 @@ -28,9 +28,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.BaseTokenStreamTestCase; public class TestSynonymTokenFilter extends BaseTokenStreamTestCase { @@ -117,44 +114,4 @@ } } - public void assertAnalyzesTo(Analyzer a, String input, String[] output, - int startOffsets[], int endOffsets[], int posIncs[]) throws Exception { - - TokenStream ts = a.tokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts - .getAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts - .getAttribute(OffsetAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts - .getAttribute(PositionIncrementAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(termAtt.term(), output[i]); - assertEquals(offsetAtt.startOffset(), startOffsets[i]); - assertEquals(offsetAtt.endOffset(), endOffsets[i]); - assertEquals(posIncAtt.getPositionIncrement(), posIncs[i]); - } - assertFalse(ts.incrementToken()); - ts.close(); - } - - public void assertAnalyzesToReuse(Analyzer a, String input, String[] output, - int startOffsets[], int endOffsets[], int posIncs[]) throws Exception { - - TokenStream ts = a.reusableTokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts - .getAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts - .getAttribute(OffsetAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts - .getAttribute(PositionIncrementAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(termAtt.term(), output[i]); - assertEquals(offsetAtt.startOffset(), startOffsets[i]); - assertEquals(offsetAtt.endOffset(), endOffsets[i]); - assertEquals(posIncAtt.getPositionIncrement(), posIncs[i]); - } - assertFalse(ts.incrementToken()); - } } Modified: lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original) +++ lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Mon Aug 24 12:44:13 2009 @@ -20,8 +20,7 @@ import java.io.Reader; import java.io.StringReader; -import junit.framework.TestCase; - +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.index.Payload; @@ -33,32 +32,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -public class TestSnowball extends TestCase { - - public void assertAnalyzesTo(Analyzer a, - String input, - String[] output) throws Exception { - TokenStream ts = a.tokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(output[i], termAtt.term()); - } - assertFalse(ts.incrementToken()); - ts.close(); - } - - public void assertAnalyzesToReuse(Analyzer a, - String input, - String[] output) throws Exception { - TokenStream ts = a.reusableTokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); - for (int i = 0; i < output.length; i++) { - assertTrue(ts.incrementToken()); - assertEquals(output[i], termAtt.term()); - } - assertFalse(ts.incrementToken()); - } +public class TestSnowball extends BaseTokenStreamTestCase { public void testEnglish() throws Exception { Analyzer a = new SnowballAnalyzer("English"); Modified: lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java (original) +++ lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java Mon Aug 24 12:44:13 2009 @@ -27,6 +27,7 @@ import java.util.Set; import java.util.HashSet; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -38,22 +39,13 @@ * * **/ -public class WikipediaTokenizerTest extends TestCase { +public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]"; - public WikipediaTokenizerTest(String s) { super(s); } - protected void setUp() { - } - - protected void tearDown() { - - } - - public void testHandwritten() throws Exception { //make sure all tokens are in only one type String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " + Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java Mon Aug 24 12:44:13 2009 @@ -866,6 +866,9 @@ if (payload !=null) { to.payload = (Payload) payload.clone(); } + // remove the following optimization in 3.0 when old TokenStream API removed: + } else if (target instanceof TokenWrapper) { + ((TokenWrapper) target).delegate = (Token) this.clone(); } else { initTermBuffer(); ((TermAttribute) target).setTermBuffer(termBuffer, 0, termLength); Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Mon Aug 24 12:44:13 2009 @@ -18,7 +18,10 @@ */ import java.util.Set; +import java.io.StringReader; +import java.io.IOException; +import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.LuceneTestCase; /** @@ -59,12 +62,6 @@ } // @Override - protected void tearDown() throws Exception { - TokenStream.setOnlyUseNewAPI(false); - super.tearDown(); - } - - // @Override public void runBare() throws Throwable { // Do the test with onlyUseNewAPI=false (default) try { @@ -86,5 +83,127 @@ } } } + + // some helpers to test Analyzers and TokenStreams: + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + assertNotNull(output); + assertTrue("has TermAttribute", ts.hasAttribute(TermAttribute.class)); + TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); + + OffsetAttribute offsetAtt = null; + if (startOffsets != null || endOffsets != null) { + assertTrue("has OffsetAttribute", ts.hasAttribute(OffsetAttribute.class)); + offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class); + } + + TypeAttribute typeAtt = null; + if (types != null) { + assertTrue("has TypeAttribute", ts.hasAttribute(TypeAttribute.class)); + typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class); + } + + PositionIncrementAttribute posIncrAtt = null; + if (posIncrements != null) { + assertTrue("has PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class)); + posIncrAtt = (PositionIncrementAttribute) ts.getAttribute(PositionIncrementAttribute.class); + } + + ts.reset(); + for (int i = 0; i < output.length; i++) { + assertTrue("token "+i+" exists", ts.incrementToken()); + assertEquals("term "+i, output[i], termAtt.term()); + if (startOffsets != null) + assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset()); + if (endOffsets != null) + assertEquals("endOffset "+i, endOffsets[i], offsetAtt.endOffset()); + if (types != null) + assertEquals("type "+i, types[i], typeAtt.type()); + if (posIncrements != null) + assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement()); + } + assertFalse("end of stream", ts.incrementToken()); + ts.close(); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output) throws IOException { + assertTokenStreamContents(ts, output, null, null, null, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, String[] types) throws IOException { + assertTokenStreamContents(ts, output, null, null, types, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] posIncrements) throws IOException { + assertTokenStreamContents(ts, output, null, null, null, posIncrements); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[]) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException { + assertAnalyzesTo(a, input, output, null, null, null, null); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws IOException { + assertAnalyzesTo(a, input, output, null, null, types, null); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException { + assertAnalyzesTo(a, input, output, null, null, null, posIncrements); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException { + assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { + assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements); + } + + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException { + assertAnalyzesToReuse(a, input, output, null, null, null, null); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException { + assertAnalyzesToReuse(a, input, output, null, null, types, null); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException { + assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException { + assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { + assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements); + } + + // simple utility method for testing stemmers + + public static void checkOneTerm(Analyzer a, final String input, final String expected) throws IOException { + assertAnalyzesTo(a, input, new String[]{expected}); + } + + public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException { + assertAnalyzesToReuse(a, input, new String[]{expected}); + } + } Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java?rev=807190&r1=807189&r2=807190&view=diff ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java Mon Aug 24 12:44:13 2009 @@ -33,19 +33,6 @@ super(name); } - public void assertAnalyzesTo(Analyzer a, - String input, - String[] output) throws Exception { - TokenStream ts = a.tokenStream("dummy", new StringReader(input)); - TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); - for (int i=0; i