Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 18794 invoked from network); 20 Aug 2008 14:39:12 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 20 Aug 2008 14:39:12 -0000 Received: (qmail 25072 invoked by uid 500); 20 Aug 2008 14:39:09 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 24991 invoked by uid 500); 20 Aug 2008 14:39:09 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 24977 invoked by uid 99); 20 Aug 2008 14:39:09 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2008 07:39:09 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2008 14:38:18 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 5942A2388A32; Wed, 20 Aug 2008 07:38:17 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r687357 [4/6] - in /lucene/java/trunk: ./ contrib/analyzers/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/ contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ contrib/analyzers/src/j... Date: Wed, 20 Aug 2008 14:38:11 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080820143817.5942A2388A32@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java (original) +++ lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java Wed Aug 20 07:38:07 2008 @@ -126,28 +126,28 @@ tcm.put("3.25", ""); tcm.put("3.50", ""); WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test)); - Token token = new Token(); int count = 0; int numItalics = 0; int numBoldItalics = 0; int numCategory = 0; int numCitation = 0; - while ((token = tf.next(token)) != null) { - String tokText = token.termText(); + final Token reusableToken = new Token(); + for (Token nextToken = tf.next(reusableToken); nextToken != null; nextToken = tf.next(reusableToken)) { + String tokText = nextToken.term(); //System.out.println("Text: " + tokText + " Type: " + token.type()); - assertTrue("token is null and it shouldn't be", token != null); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); String expectedType = (String) tcm.get(tokText); - assertTrue("expectedType is null and it shouldn't be for: " + token, expectedType != null); - assertTrue(token.type() + " is not equal to " + expectedType + " for " + token, token.type().equals(expectedType) == true); + assertTrue("expectedType is null and it shouldn't be for: " + nextToken, expectedType != null); + assertTrue(nextToken.type() + " is not equal to " + expectedType + " for " + nextToken, nextToken.type().equals(expectedType) == true); count++; - if (token.type().equals(WikipediaTokenizer.ITALICS) == true){ + if (nextToken.type().equals(WikipediaTokenizer.ITALICS) == true){ numItalics++; - } else if (token.type().equals(WikipediaTokenizer.BOLD_ITALICS) == true){ + } else if (nextToken.type().equals(WikipediaTokenizer.BOLD_ITALICS) == true){ numBoldItalics++; - } else if (token.type().equals(WikipediaTokenizer.CATEGORY) == true){ + } else if (nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true){ numCategory++; } - else if (token.type().equals(WikipediaTokenizer.CITATION) == true){ + else if (nextToken.type().equals(WikipediaTokenizer.CITATION) == true){ numCitation++; } } @@ -166,105 +166,105 @@ } private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException { - Token token = new Token(); - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "click", new String(token.termBuffer(), 0, token.termLength()).equals("click") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "link", new String(token.termBuffer(), 0, token.termLength()).equals("link") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here", - new String(token.termBuffer(), 0, token.termLength()).equals("here") == true); + final Token reusableToken = new Token(); + Token nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "click", nextToken.term().equals("click") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "link", nextToken.term().equals("link") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "here", + nextToken.term().equals("here") == true); //The link, and here should be at the same position for phrases to work - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "again", - new String(token.termBuffer(), 0, token.termLength()).equals("again") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "click", - new String(token.termBuffer(), 0, token.termLength()).equals("click") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "http://lucene.apache.org", - new String(token.termBuffer(), 0, token.termLength()).equals("http://lucene.apache.org") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here", - new String(token.termBuffer(), 0, token.termLength()).equals("here") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "again", - new String(token.termBuffer(), 0, token.termLength()).equals("again") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "a", - new String(token.termBuffer(), 0, token.termLength()).equals("a") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "b", - new String(token.termBuffer(), 0, token.termLength()).equals("b") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "c", - new String(token.termBuffer(), 0, token.termLength()).equals("c") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "d", - new String(token.termBuffer(), 0, token.termLength()).equals("d") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "again", + nextToken.term().equals("again") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "click", + nextToken.term().equals("click") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "http://lucene.apache.org", + nextToken.term().equals("http://lucene.apache.org") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "here", + nextToken.term().equals("here") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 0, nextToken.getPositionIncrement() == 0); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "again", + nextToken.term().equals("again") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "a", + nextToken.term().equals("a") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "b", + nextToken.term().equals("b") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "c", + nextToken.term().equals("c") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "d", + nextToken.term().equals("d") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); - token = tf.next(); - assertTrue("token is not null and it should be", token == null); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is not null and it should be", nextToken == null); } public void testLinks() throws Exception { String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]"; WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test)); - Token token = new Token(); - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news", - new String(token.termBuffer(), 0, token.termLength()).equals("http://lucene.apache.org/java/docs/index.html#news") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, token.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - tf.next(token);//skip here - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c", - new String(token.termBuffer(), 0, token.termLength()).equals("http://lucene.apache.org/java/docs/index.html?b=c") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, token.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - tf.next(token);//skip here - token = tf.next(token); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c", - new String(token.termBuffer(), 0, token.termLength()).equals("https://lucene.apache.org/java/docs/index.html?b=c") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, token.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); + final Token reusableToken = new Token(); + Token nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news", + nextToken.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, nextToken.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); + tf.next(reusableToken);//skip here + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c", + nextToken.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, nextToken.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); + tf.next(reusableToken);//skip here + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c", + nextToken.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, nextToken.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); - token = tf.next(); - assertTrue("token is not null and it should be", token == null); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is not null and it should be", nextToken == null); } @@ -277,72 +277,72 @@ checkLinkPhrases(tf); String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks); - Token token; - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "a b c d", - new String(token.termBuffer(), 0, token.termLength()).equals("a b c d") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.startOffset() + " does not equal: " + 11, token.startOffset() == 11); - assertTrue(token.endOffset() + " does not equal: " + 18, token.endOffset() == 18); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "e f g", - new String(token.termBuffer(), 0, token.termLength()).equals("e f g") == true); - assertTrue(token.startOffset() + " does not equal: " + 32, token.startOffset() == 32); - assertTrue(token.endOffset() + " does not equal: " + 37, token.endOffset() == 37); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "link", - new String(token.termBuffer(), 0, token.termLength()).equals("link") == true); - assertTrue(token.startOffset() + " does not equal: " + 42, token.startOffset() == 42); - assertTrue(token.endOffset() + " does not equal: " + 46, token.endOffset() == 46); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here", - new String(token.termBuffer(), 0, token.termLength()).equals("here") == true); - assertTrue(token.startOffset() + " does not equal: " + 47, token.startOffset() == 47); - assertTrue(token.endOffset() + " does not equal: " + 51, token.endOffset() == 51); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "link", - new String(token.termBuffer(), 0, token.termLength()).equals("link") == true); - assertTrue(token.startOffset() + " does not equal: " + 56, token.startOffset() == 56); - assertTrue(token.endOffset() + " does not equal: " + 60, token.endOffset() == 60); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "there", - new String(token.termBuffer(), 0, token.termLength()).equals("there") == true); - assertTrue(token.startOffset() + " does not equal: " + 61, token.startOffset() == 61); - assertTrue(token.endOffset() + " does not equal: " + 66, token.endOffset() == 66); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "italics here", - new String(token.termBuffer(), 0, token.termLength()).equals("italics here") == true); - assertTrue(token.startOffset() + " does not equal: " + 71, token.startOffset() == 71); - assertTrue(token.endOffset() + " does not equal: " + 83, token.endOffset() == 83); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "something", - new String(token.termBuffer(), 0, token.termLength()).equals("something") == true); - assertTrue(token.startOffset() + " does not equal: " + 86, token.startOffset() == 86); - assertTrue(token.endOffset() + " does not equal: " + 95, token.endOffset() == 95); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "more italics", - new String(token.termBuffer(), 0, token.termLength()).equals("more italics") == true); - assertTrue(token.startOffset() + " does not equal: " + 98, token.startOffset() == 98); - assertTrue(token.endOffset() + " does not equal: " + 110, token.endOffset() == 110); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "h i j", - new String(token.termBuffer(), 0, token.termLength()).equals("h i j") == true); - assertTrue(token.startOffset() + " does not equal: " + 124, token.startOffset() == 124); - assertTrue(token.endOffset() + " does not equal: " + 133, token.endOffset() == 133); + final Token reusableToken = new Token(); + Token nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "a b c d", + nextToken.term().equals("a b c d") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.startOffset() + " does not equal: " + 11, nextToken.startOffset() == 11); + assertTrue(nextToken.endOffset() + " does not equal: " + 18, nextToken.endOffset() == 18); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "e f g", + nextToken.term().equals("e f g") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 32, nextToken.startOffset() == 32); + assertTrue(nextToken.endOffset() + " does not equal: " + 37, nextToken.endOffset() == 37); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "link", + nextToken.term().equals("link") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 42, nextToken.startOffset() == 42); + assertTrue(nextToken.endOffset() + " does not equal: " + 46, nextToken.endOffset() == 46); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "here", + nextToken.term().equals("here") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 47, nextToken.startOffset() == 47); + assertTrue(nextToken.endOffset() + " does not equal: " + 51, nextToken.endOffset() == 51); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "link", + nextToken.term().equals("link") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 56, nextToken.startOffset() == 56); + assertTrue(nextToken.endOffset() + " does not equal: " + 60, nextToken.endOffset() == 60); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "there", + nextToken.term().equals("there") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 61, nextToken.startOffset() == 61); + assertTrue(nextToken.endOffset() + " does not equal: " + 66, nextToken.endOffset() == 66); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "italics here", + nextToken.term().equals("italics here") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 71, nextToken.startOffset() == 71); + assertTrue(nextToken.endOffset() + " does not equal: " + 83, nextToken.endOffset() == 83); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "something", + nextToken.term().equals("something") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 86, nextToken.startOffset() == 86); + assertTrue(nextToken.endOffset() + " does not equal: " + 95, nextToken.endOffset() == 95); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "more italics", + nextToken.term().equals("more italics") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 98, nextToken.startOffset() == 98); + assertTrue(nextToken.endOffset() + " does not equal: " + 110, nextToken.endOffset() == 110); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "h i j", + nextToken.term().equals("h i j") == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 124, nextToken.startOffset() == 124); + assertTrue(nextToken.endOffset() + " does not equal: " + 133, nextToken.endOffset() == 133); - token = tf.next(); - assertTrue("token is not null and it should be", token == null); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is not null and it should be", nextToken == null); } public void testBoth() throws Exception { @@ -352,225 +352,225 @@ String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; //should output all the indivual tokens plus the untokenized tokens as well. Untokenized tokens WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks); - Token token; - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "a b c d", - new String(token.termBuffer(), 0, token.termLength()).equals("a b c d") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, token.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(token.startOffset() + " does not equal: " + 11, token.startOffset() == 11); - assertTrue(token.endOffset() + " does not equal: " + 18, token.endOffset() == 18); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "a", - new String(token.termBuffer(), 0, token.termLength()).equals("a") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", token.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(token.startOffset() + " does not equal: " + 11, token.startOffset() == 11); - assertTrue(token.endOffset() + " does not equal: " + 12, token.endOffset() == 12); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "b", - new String(token.termBuffer(), 0, token.termLength()).equals("b") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.startOffset() + " does not equal: " + 13, token.startOffset() == 13); - assertTrue(token.endOffset() + " does not equal: " + 14, token.endOffset() == 14); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "c", - new String(token.termBuffer(), 0, token.termLength()).equals("c") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.startOffset() + " does not equal: " + 15, token.startOffset() == 15); - assertTrue(token.endOffset() + " does not equal: " + 16, token.endOffset() == 16); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "d", - new String(token.termBuffer(), 0, token.termLength()).equals("d") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.startOffset() + " does not equal: " + 17, token.startOffset() == 17); - assertTrue(token.endOffset() + " does not equal: " + 18, token.endOffset() == 18); - - - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "e f g", - new String(token.termBuffer(), 0, token.termLength()).equals("e f g") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, token.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(token.startOffset() + " does not equal: " + 32, token.startOffset() == 32); - assertTrue(token.endOffset() + " does not equal: " + 37, token.endOffset() == 37); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "e", - new String(token.termBuffer(), 0, token.termLength()).equals("e") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0); - assertTrue(token.startOffset() + " does not equal: " + 32, token.startOffset() == 32); - assertTrue(token.endOffset() + " does not equal: " + 33, token.endOffset() == 33); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "f", - new String(token.termBuffer(), 0, token.termLength()).equals("f") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.startOffset() + " does not equal: " + 34, token.startOffset() == 34); - assertTrue(token.endOffset() + " does not equal: " + 35, token.endOffset() == 35); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "g", - new String(token.termBuffer(), 0, token.termLength()).equals("g") == true); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.startOffset() + " does not equal: " + 36, token.startOffset() == 36); - assertTrue(token.endOffset() + " does not equal: " + 37, token.endOffset() == 37); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "link", - new String(token.termBuffer(), 0, token.termLength()).equals("link") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, token.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(token.startOffset() + " does not equal: " + 42, token.startOffset() == 42); - assertTrue(token.endOffset() + " does not equal: " + 46, token.endOffset() == 46); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here", - new String(token.termBuffer(), 0, token.termLength()).equals("here") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, token.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(token.startOffset() + " does not equal: " + 47, token.startOffset() == 47); - assertTrue(token.endOffset() + " does not equal: " + 51, token.endOffset() == 51); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "link", - new String(token.termBuffer(), 0, token.termLength()).equals("link") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.startOffset() + " does not equal: " + 56, token.startOffset() == 56); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, token.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(token.endOffset() + " does not equal: " + 60, token.endOffset() == 60); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "there", - new String(token.termBuffer(), 0, token.termLength()).equals("there") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, token.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(token.startOffset() + " does not equal: " + 61, token.startOffset() == 61); - assertTrue(token.endOffset() + " does not equal: " + 66, token.endOffset() == 66); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "italics here", - new String(token.termBuffer(), 0, token.termLength()).equals("italics here") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.ITALICS, token.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(token.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, token.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(token.startOffset() + " does not equal: " + 71, token.startOffset() == 71); - assertTrue(token.endOffset() + " does not equal: " + 83, token.endOffset() == 83); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "italics", - new String(token.termBuffer(), 0, token.termLength()).equals("italics") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.ITALICS, token.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(token.startOffset() + " does not equal: " + 71, token.startOffset() == 71); - assertTrue(token.endOffset() + " does not equal: " + 78, token.endOffset() == 78); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here", - new String(token.termBuffer(), 0, token.termLength()).equals("here") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.ITALICS, token.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(token.startOffset() + " does not equal: " + 79, token.startOffset() == 79); - assertTrue(token.endOffset() + " does not equal: " + 83, token.endOffset() == 83); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "something", - new String(token.termBuffer(), 0, token.termLength()).equals("something") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.startOffset() + " does not equal: " + 86, token.startOffset() == 86); - assertTrue(token.endOffset() + " does not equal: " + 95, token.endOffset() == 95); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "more italics", - new String(token.termBuffer(), 0, token.termLength()).equals("more italics") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.ITALICS, token.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(token.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, token.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(token.startOffset() + " does not equal: " + 98, token.startOffset() == 98); - assertTrue(token.endOffset() + " does not equal: " + 110, token.endOffset() == 110); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "more", - new String(token.termBuffer(), 0, token.termLength()).equals("more") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.ITALICS, token.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(token.startOffset() + " does not equal: " + 98, token.startOffset() == 98); - assertTrue(token.endOffset() + " does not equal: " + 102, token.endOffset() == 102); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "italics", - new String(token.termBuffer(), 0, token.termLength()).equals("italics") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.ITALICS, token.type().equals(WikipediaTokenizer.ITALICS) == true); - - assertTrue(token.startOffset() + " does not equal: " + 103, token.startOffset() == 103); - assertTrue(token.endOffset() + " does not equal: " + 110, token.endOffset() == 110); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "h i j", - new String(token.termBuffer(), 0, token.termLength()).equals("h i j") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, token.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(token.startOffset() + " does not equal: " + 124, token.startOffset() == 124); - assertTrue(token.endOffset() + " does not equal: " + 133, token.endOffset() == 133); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "h", - new String(token.termBuffer(), 0, token.termLength()).equals("h") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.startOffset() + " does not equal: " + 124, token.startOffset() == 124); - assertTrue(token.endOffset() + " does not equal: " + 125, token.endOffset() == 125); - - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "i", - new String(token.termBuffer(), 0, token.termLength()).equals("i") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.startOffset() + " does not equal: " + 128, token.startOffset() == 128); - assertTrue(token.endOffset() + " does not equal: " + 129, token.endOffset() == 129); - token = tf.next(); - assertTrue("token is null and it shouldn't be", token != null); - assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "j", - new String(token.termBuffer(), 0, token.termLength()).equals("j") == true); - assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1); - assertTrue(token.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, token.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(token.startOffset() + " does not equal: " + 132, token.startOffset() == 132); - assertTrue(token.endOffset() + " does not equal: " + 133, token.endOffset() == 133); + final Token reusableToken = new Token(); + Token nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "a b c d", + nextToken.term().equals("a b c d") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, nextToken.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); + assertTrue(nextToken.startOffset() + " does not equal: " + 11, nextToken.startOffset() == 11); + assertTrue(nextToken.endOffset() + " does not equal: " + 18, nextToken.endOffset() == 18); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "a", + nextToken.term().equals("a") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 0, nextToken.getPositionIncrement() == 0); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", nextToken.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); + assertTrue(nextToken.startOffset() + " does not equal: " + 11, nextToken.startOffset() == 11); + assertTrue(nextToken.endOffset() + " does not equal: " + 12, nextToken.endOffset() == 12); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "b", + nextToken.term().equals("b") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 13, nextToken.startOffset() == 13); + assertTrue(nextToken.endOffset() + " does not equal: " + 14, nextToken.endOffset() == 14); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "c", + nextToken.term().equals("c") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 15, nextToken.startOffset() == 15); + assertTrue(nextToken.endOffset() + " does not equal: " + 16, nextToken.endOffset() == 16); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "d", + nextToken.term().equals("d") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 17, nextToken.startOffset() == 17); + assertTrue(nextToken.endOffset() + " does not equal: " + 18, nextToken.endOffset() == 18); + + + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "e f g", + nextToken.term().equals("e f g") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, nextToken.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); + assertTrue(nextToken.startOffset() + " does not equal: " + 32, nextToken.startOffset() == 32); + assertTrue(nextToken.endOffset() + " does not equal: " + 37, nextToken.endOffset() == 37); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "e", + nextToken.term().equals("e") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 0, nextToken.getPositionIncrement() == 0); + assertTrue(nextToken.startOffset() + " does not equal: " + 32, nextToken.startOffset() == 32); + assertTrue(nextToken.endOffset() + " does not equal: " + 33, nextToken.endOffset() == 33); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "f", + nextToken.term().equals("f") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.startOffset() + " does not equal: " + 34, nextToken.startOffset() == 34); + assertTrue(nextToken.endOffset() + " does not equal: " + 35, nextToken.endOffset() == 35); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "g", + nextToken.term().equals("g") == true); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.startOffset() + " does not equal: " + 36, nextToken.startOffset() == 36); + assertTrue(nextToken.endOffset() + " does not equal: " + 37, nextToken.endOffset() == 37); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "link", + nextToken.term().equals("link") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, nextToken.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 42, nextToken.startOffset() == 42); + assertTrue(nextToken.endOffset() + " does not equal: " + 46, nextToken.endOffset() == 46); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "here", + nextToken.term().equals("here") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, nextToken.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 47, nextToken.startOffset() == 47); + assertTrue(nextToken.endOffset() + " does not equal: " + 51, nextToken.endOffset() == 51); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "link", + nextToken.term().equals("link") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.startOffset() + " does not equal: " + 56, nextToken.startOffset() == 56); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, nextToken.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); + assertTrue(nextToken.endOffset() + " does not equal: " + 60, nextToken.endOffset() == 60); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "there", + nextToken.term().equals("there") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, nextToken.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 61, nextToken.startOffset() == 61); + assertTrue(nextToken.endOffset() + " does not equal: " + 66, nextToken.endOffset() == 66); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "italics here", + nextToken.term().equals("italics here") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.ITALICS, nextToken.type().equals(WikipediaTokenizer.ITALICS) == true); + assertTrue(nextToken.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, nextToken.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); + assertTrue(nextToken.startOffset() + " does not equal: " + 71, nextToken.startOffset() == 71); + assertTrue(nextToken.endOffset() + " does not equal: " + 83, nextToken.endOffset() == 83); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "italics", + nextToken.term().equals("italics") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 0, nextToken.getPositionIncrement() == 0); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.ITALICS, nextToken.type().equals(WikipediaTokenizer.ITALICS) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 71, nextToken.startOffset() == 71); + assertTrue(nextToken.endOffset() + " does not equal: " + 78, nextToken.endOffset() == 78); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "here", + nextToken.term().equals("here") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.ITALICS, nextToken.type().equals(WikipediaTokenizer.ITALICS) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 79, nextToken.startOffset() == 79); + assertTrue(nextToken.endOffset() + " does not equal: " + 83, nextToken.endOffset() == 83); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "something", + nextToken.term().equals("something") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.startOffset() + " does not equal: " + 86, nextToken.startOffset() == 86); + assertTrue(nextToken.endOffset() + " does not equal: " + 95, nextToken.endOffset() == 95); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "more italics", + nextToken.term().equals("more italics") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.ITALICS, nextToken.type().equals(WikipediaTokenizer.ITALICS) == true); + assertTrue(nextToken.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, nextToken.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); + assertTrue(nextToken.startOffset() + " does not equal: " + 98, nextToken.startOffset() == 98); + assertTrue(nextToken.endOffset() + " does not equal: " + 110, nextToken.endOffset() == 110); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "more", + nextToken.term().equals("more") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 0, nextToken.getPositionIncrement() == 0); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.ITALICS, nextToken.type().equals(WikipediaTokenizer.ITALICS) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 98, nextToken.startOffset() == 98); + assertTrue(nextToken.endOffset() + " does not equal: " + 102, nextToken.endOffset() == 102); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "italics", + nextToken.term().equals("italics") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.ITALICS, nextToken.type().equals(WikipediaTokenizer.ITALICS) == true); + + assertTrue(nextToken.startOffset() + " does not equal: " + 103, nextToken.startOffset() == 103); + assertTrue(nextToken.endOffset() + " does not equal: " + 110, nextToken.endOffset() == 110); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "h i j", + nextToken.term().equals("h i j") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, nextToken.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); + assertTrue(nextToken.startOffset() + " does not equal: " + 124, nextToken.startOffset() == 124); + assertTrue(nextToken.endOffset() + " does not equal: " + 133, nextToken.endOffset() == 133); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "h", + nextToken.term().equals("h") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 0, nextToken.getPositionIncrement() == 0); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 124, nextToken.startOffset() == 124); + assertTrue(nextToken.endOffset() + " does not equal: " + 125, nextToken.endOffset() == 125); + + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "i", + nextToken.term().equals("i") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 128, nextToken.startOffset() == 128); + assertTrue(nextToken.endOffset() + " does not equal: " + 129, nextToken.endOffset() == 129); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + assertTrue(nextToken.term() + " is not equal to " + "j", + nextToken.term().equals("j") == true); + assertTrue(nextToken.getPositionIncrement() + " does not equal: " + 1, nextToken.getPositionIncrement() == 1); + assertTrue(nextToken.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, nextToken.type().equals(WikipediaTokenizer.CATEGORY) == true); + assertTrue(nextToken.startOffset() + " does not equal: " + 132, nextToken.startOffset() == 132); + assertTrue(nextToken.endOffset() + " does not equal: " + 133, nextToken.endOffset() == 133); - token = tf.next(); - assertTrue("token is not null and it should be", token == null); + nextToken = tf.next(reusableToken); + assertTrue("nextToken is not null and it should be", nextToken == null); } } Modified: lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java (original) +++ lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java Wed Aug 20 07:38:07 2008 @@ -17,14 +17,28 @@ * limitations under the License. */ -import org.apache.lucene.store.*; -import org.apache.lucene.search.*; -import org.apache.lucene.index.*; -import org.apache.lucene.document.*; -import org.apache.lucene.analysis.*; -import org.apache.lucene.analysis.standard.*; -import java.io.*; -import java.util.*; +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.FSDirectory; /** @@ -99,10 +113,10 @@ // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.tokenStream( field, new StringReader( query)); - org.apache.lucene.analysis.Token t; - while ( (t = ts.next()) != null) - { - String word = t.termText(); + + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + String word = nextToken.term(); if ( already.add( word)) top.add( word); } Modified: lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java (original) +++ lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java Wed Aug 20 07:38:07 2008 @@ -17,13 +17,27 @@ * limitations under the License. */ -import org.apache.lucene.store.*; -import org.apache.lucene.search.*; -import org.apache.lucene.index.*; -import org.apache.lucene.document.*; -import org.apache.lucene.analysis.*; -import java.io.*; -import java.util.*; +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.FSDirectory; /** @@ -86,10 +100,9 @@ // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.tokenStream( field, new StringReader( query)); - org.apache.lucene.analysis.Token t; - while ( (t = ts.next()) != null) - { - String word = t.termText(); + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + String word = nextToken.term(); if ( already.add( word)) top.add( word); } Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java (original) +++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java Wed Aug 20 07:38:07 2008 @@ -74,16 +74,14 @@ if((stopWords!=null)&&(fields!=null)) { stopWordsSet=new HashSet(); + final Token reusableToken = new Token(); for (int i = 0; i < fields.length; i++) { TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords)); try { - Token stopToken=ts.next(); - while(stopToken!=null) - { - stopWordsSet.add(stopToken.termText()); - stopToken=ts.next(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + stopWordsSet.add(nextToken.term()); } } catch(IOException ioe) Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java (original) +++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java Wed Aug 20 07:38:07 2008 @@ -52,12 +52,10 @@ { ArrayList clausesList=new ArrayList(); TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value)); - Token token=ts.next(); - while(token!=null) - { - SpanTermQuery stq=new SpanTermQuery(new Term(fieldName,token.termText())); + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + SpanTermQuery stq=new SpanTermQuery(new Term(fieldName,nextToken.term())); clausesList.add(stq); - token=ts.next(); } SpanOrQuery soq=new SpanOrQuery((SpanQuery[]) clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java (original) +++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java Wed Aug 20 07:38:07 2008 @@ -59,20 +59,18 @@ try { - Token token = ts.next(); + final Token reusableToken = new Token(); Term term = null; - while (token != null) - { + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { if (term == null) { - term = new Term(fieldName, token.termText()); + term = new Term(fieldName, nextToken.term()); } else { // create from previous to save fieldName.intern overhead - term = term.createTerm(token.termText()); + term = term.createTerm(nextToken.term()); } tf.addTerm(term); - token = ts.next(); } } catch (IOException ioe) Modified: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java (original) +++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java Wed Aug 20 07:38:07 2008 @@ -58,20 +58,18 @@ TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); try { - Token token = ts.next(); + final Token reusableToken = new Token(); Term term = null; - while (token != null) - { + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { if (term == null) { - term = new Term(fieldName, token.termText()); + term = new Term(fieldName, nextToken.term()); } else { // create from previous to save fieldName.intern overhead - term = term.createTerm(token.termText()); + term = term.createTerm(nextToken.term()); } bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD)); - token = ts.next(); } } catch (IOException ioe) Modified: lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParser.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParser.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParser.java (original) +++ lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParser.java Wed Aug 20 07:38:07 2008 @@ -487,7 +487,10 @@ private int jj_gc = 0; public HTMLParser(java.io.InputStream stream) { - jj_input_stream = new SimpleCharStream(stream, 1, 1); + this(stream, null); + } + public HTMLParser(java.io.InputStream stream, String encoding) { + try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } token_source = new HTMLParserTokenManager(jj_input_stream); token = new Token(); jj_ntk = -1; @@ -497,7 +500,10 @@ } public void ReInit(java.io.InputStream stream) { - jj_input_stream.ReInit(stream, 1, 1); + ReInit(stream, null); + } + public void ReInit(java.io.InputStream stream, String encoding) { + try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } token_source.ReInit(jj_input_stream); token = new Token(); jj_ntk = -1; @@ -627,7 +633,9 @@ jj_lasttokens[jj_endpos++] = kind; } else if (jj_endpos != 0) { jj_expentry = new int[jj_endpos]; - System.arraycopy(jj_lasttokens, 0, jj_expentry, 0, jj_endpos); + for (int i = 0; i < jj_endpos; i++) { + jj_expentry[i] = jj_lasttokens[i]; + } boolean exists = false; for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) { int[] oldentry = (int[])(e.nextElement()); @@ -692,6 +700,7 @@ final private void jj_rescan_token() { jj_rescan = true; for (int i = 0; i < 2; i++) { + try { JJCalls p = jj_2_rtns[i]; do { if (p.gen > jj_gen) { @@ -703,6 +712,7 @@ } p = p.next; } while (p != null); + } catch(LookaheadSuccess ls) { } } jj_rescan = false; } Modified: lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java (original) +++ lucene/java/trunk/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java Wed Aug 20 07:38:07 2008 @@ -1457,14 +1457,12 @@ private final int[] jjrounds = new int[28]; private final int[] jjstateSet = new int[56]; protected char curChar; -public HTMLParserTokenManager(SimpleCharStream stream) -{ +public HTMLParserTokenManager(SimpleCharStream stream){ if (SimpleCharStream.staticFlag) throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer."); input_stream = stream; } -public HTMLParserTokenManager(SimpleCharStream stream, int lexState) -{ +public HTMLParserTokenManager(SimpleCharStream stream, int lexState){ this(stream); SwitchTo(lexState); } Modified: lucene/java/trunk/src/demo/org/apache/lucene/demo/html/ParseException.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/demo/org/apache/lucene/demo/html/ParseException.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/demo/org/apache/lucene/demo/html/ParseException.java (original) +++ lucene/java/trunk/src/demo/org/apache/lucene/demo/html/ParseException.java Wed Aug 20 07:38:07 2008 @@ -98,19 +98,19 @@ if (!specialConstructor) { return super.getMessage(); } - String expected = ""; + StringBuffer expected = new StringBuffer(); int maxSize = 0; for (int i = 0; i < expectedTokenSequences.length; i++) { if (maxSize < expectedTokenSequences[i].length) { maxSize = expectedTokenSequences[i].length; } for (int j = 0; j < expectedTokenSequences[i].length; j++) { - expected += tokenImage[expectedTokenSequences[i][j]] + " "; + expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" "); } if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { - expected += "..."; + expected.append("..."); } - expected += eol + " "; + expected.append(eol).append(" "); } String retval = "Encountered \""; Token tok = currentToken.next; @@ -130,7 +130,7 @@ } else { retval += "Was expecting one of:" + eol + " "; } - retval += expected; + retval += expected.toString(); return retval; } @@ -179,7 +179,7 @@ default: if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u").append(s.substring(s.length() - 4, s.length())); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); } else { retval.append(ch); } Modified: lucene/java/trunk/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java (original) +++ lucene/java/trunk/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java Wed Aug 20 07:38:07 2008 @@ -1,4 +1,4 @@ -/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 3.0 */ +/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.0 */ package org.apache.lucene.demo.html; /** @@ -27,6 +27,11 @@ protected char[] buffer; protected int maxNextCharInd = 0; protected int inBuf = 0; + protected int tabSize = 8; + + protected void setTabSize(int i) { tabSize = i; } + protected int getTabSize(int i) { return tabSize; } + protected void ExpandBuff(boolean wrapAround) { @@ -162,7 +167,7 @@ break; case '\t' : column--; - column += (8 - (column & 07)); + column += (tabSize - (column % tabSize)); break; default : break; @@ -248,7 +253,7 @@ } public SimpleCharStream(java.io.Reader dstream, int startline, - int startcolumn) + int startcolumn) { this(dstream, startline, startcolumn, 4096); } @@ -277,7 +282,7 @@ } public void ReInit(java.io.Reader dstream, int startline, - int startcolumn) + int startcolumn) { ReInit(dstream, startline, startcolumn, 4096); } @@ -286,35 +291,68 @@ { ReInit(dstream, 1, 1, 4096); } + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + public SimpleCharStream(java.io.InputStream dstream, int startline, int startcolumn, int buffersize) { - this(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096); + this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, startline, startcolumn, 4096); } public SimpleCharStream(java.io.InputStream dstream, int startline, - int startcolumn) + int startcolumn) { this(dstream, startline, startcolumn, 4096); } + public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, 1, 1, 4096); + } + public SimpleCharStream(java.io.InputStream dstream) { this(dstream, 1, 1, 4096); } + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + public void ReInit(java.io.InputStream dstream, int startline, int startcolumn, int buffersize) { - ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096); + ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, 1, 1, 4096); } public void ReInit(java.io.InputStream dstream) { ReInit(dstream, 1, 1, 4096); } + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, startline, startcolumn, 4096); + } public void ReInit(java.io.InputStream dstream, int startline, - int startcolumn) + int startcolumn) { ReInit(dstream, startline, startcolumn, 4096); } Modified: lucene/java/trunk/src/demo/org/apache/lucene/demo/html/TokenMgrError.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/demo/org/apache/lucene/demo/html/TokenMgrError.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/demo/org/apache/lucene/demo/html/TokenMgrError.java (original) +++ lucene/java/trunk/src/demo/org/apache/lucene/demo/html/TokenMgrError.java Wed Aug 20 07:38:07 2008 @@ -72,7 +72,7 @@ default: if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u").append(s.substring(s.length() - 4, s.length())); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); } else { retval.append(ch); } Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CachingTokenFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CachingTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CachingTokenFilter.java Wed Aug 20 07:38:07 2008 @@ -40,11 +40,12 @@ super(input); } - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (cache == null) { // fill cache lazily cache = new LinkedList(); - fillCache(); + fillCache(reusableToken); iterator = cache.iterator(); } @@ -52,8 +53,9 @@ // the cache is exhausted, return null return null; } - - return (Token) iterator.next(); + // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. + Token nextToken = (Token) iterator.next(); + return (Token) nextToken.clone(); } public void reset() throws IOException { @@ -62,10 +64,9 @@ } } - private void fillCache() throws IOException { - Token token; - while ( (token = input.next()) != null) { - cache.add(token); + private void fillCache(final Token reusableToken) throws IOException { + for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) { + cache.add(nextToken.clone()); } } Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java Wed Aug 20 07:38:07 2008 @@ -44,11 +44,12 @@ return c; } - public final Token next(Token token) throws IOException { - token.clear(); + public final Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + reusableToken.clear(); int length = 0; int start = bufferIndex; - char[] buffer = token.termBuffer(); + char[] buffer = reusableToken.termBuffer(); while (true) { if (bufferIndex >= dataLen) { @@ -70,7 +71,7 @@ if (length == 0) // start of token start = offset + bufferIndex - 1; else if (length == buffer.length) - buffer = token.resizeTermBuffer(1+length); + buffer = reusableToken.resizeTermBuffer(1+length); buffer[length++] = normalize(c); // buffer it, normalized @@ -81,10 +82,10 @@ break; // return 'em } - token.termLength = length; - token.startOffset = start; - token.endOffset = start+length; - return token; + reusableToken.setTermLength(length); + reusableToken.setStartOffset(start); + reusableToken.setEndOffset(start+length); + return reusableToken; } public void reset(Reader input) throws IOException { Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java (original) +++ lucene/java/trunk/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java Wed Aug 20 07:38:07 2008 @@ -32,22 +32,23 @@ private char[] output = new char[256]; private int outputPos; - public final Token next(Token result) throws java.io.IOException { - result = input.next(result); - if (result != null) { - final char[] buffer = result.termBuffer(); - final int length = result.termLength(); + public final Token next(final Token reusableToken) throws java.io.IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); + if (nextToken != null) { + final char[] buffer = nextToken.termBuffer(); + final int length = nextToken.termLength(); // If no characters actually require rewriting then we // just return token as-is: for(int i=0;i= '\u00c0' && c <= '\uFB06') { removeAccents(buffer, length); - result.setTermBuffer(output, 0, outputPos); + nextToken.setTermBuffer(output, 0, outputPos); break; } } - return result; + return nextToken; } else return null; }