Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 18803 invoked from network); 20 Aug 2008 14:39:12 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 20 Aug 2008 14:39:12 -0000 Received: (qmail 25087 invoked by uid 500); 20 Aug 2008 14:39:09 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 25013 invoked by uid 500); 20 Aug 2008 14:39:09 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 24979 invoked by uid 99); 20 Aug 2008 14:39:09 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2008 07:39:09 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2008 14:38:18 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 41CB12388A01; Wed, 20 Aug 2008 07:38:17 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r687357 [2/6] - in /lucene/java/trunk: ./ contrib/analyzers/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/ contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ contrib/analyzers/src/j... Date: Wed, 20 Aug 2008 14:38:11 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080820143817.41CB12388A01@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Wed Aug 20 07:38:07 2008 @@ -68,52 +68,46 @@ public void testFrontUnigram() throws Exception { EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 1); - Token token = null; - token = tokenizer.next(); - assertEquals("(a,0,1)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(a,0,1)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } public void testBackUnigram() throws Exception { EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 1); - Token token = null; - token = tokenizer.next(); - assertEquals("(e,4,5)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(e,4,5)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } public void testOversizedNgrams() throws Exception { EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 6, 6); - Token token = null; - token = tokenizer.next(); - assertNull(token); + assertNull(tokenizer.next(new Token())); } public void testFrontRangeOfNgrams() throws Exception { EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 3); - Token token = null; - token = tokenizer.next(); - assertEquals("(a,0,1)", token.toString()); - token = tokenizer.next(); - assertEquals("(ab,0,2)", token.toString()); - token = tokenizer.next(); - assertEquals("(abc,0,3)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(a,0,1)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(ab,0,2)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(abc,0,3)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } public void testBackRangeOfNgrams() throws Exception { EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 3); - Token token = null; - token = tokenizer.next(); - assertEquals("(e,4,5)", token.toString()); - token = tokenizer.next(); - assertEquals("(de,3,5)", token.toString()); - token = tokenizer.next(); - assertEquals("(cde,2,5)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(e,4,5)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(de,3,5)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(cde,2,5)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java Wed Aug 20 07:38:07 2008 @@ -66,52 +66,46 @@ public void testFrontUnigram() throws Exception { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 1, 1); - Token token = null; - token = tokenizer.next(); - assertEquals("(a,0,1)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(a,0,1)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } public void testBackUnigram() throws Exception { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.BACK, 1, 1); - Token token = null; - token = tokenizer.next(); - assertEquals("(e,4,5)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(e,4,5)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } public void testOversizedNgrams() throws Exception { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 6, 6); - Token token = null; - token = tokenizer.next(); - assertNull(token); + assertNull(tokenizer.next(new Token())); } public void testFrontRangeOfNgrams() throws Exception { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 1, 3); - Token token = null; - token = tokenizer.next(); - assertEquals("(a,0,1)", token.toString()); - token = tokenizer.next(); - assertEquals("(ab,0,2)", token.toString()); - token = tokenizer.next(); - assertEquals("(abc,0,3)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(a,0,1)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(ab,0,2)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(abc,0,3)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } public void testBackRangeOfNgrams() throws Exception { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.BACK, 1, 3); - Token token = null; - token = tokenizer.next(); - assertEquals("(e,4,5)", token.toString()); - token = tokenizer.next(); - assertEquals("(de,3,5)", token.toString()); - token = tokenizer.next(); - assertEquals("(cde,2,5)", token.toString()); - token = tokenizer.next(); - assertNull(token); + final Token reusableToken = new Token(); + Token nextToken = tokenizer.next(reusableToken); + assertEquals("(e,4,5)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(de,3,5)", nextToken.toString()); + nextToken = tokenizer.next(reusableToken); + assertEquals("(cde,2,5)", nextToken.toString()); + assertNull(tokenizer.next(reusableToken)); } } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Wed Aug 20 07:38:07 2008 @@ -60,17 +60,14 @@ public void testUnigrams() throws Exception { NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1); - - Token token = null; - do { - token = filter.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + + final Token reusableToken = new Token(); + for (Token nextToken = filter.next(reusableToken); nextToken != null; nextToken = filter.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertEquals(5, tokens.size()); ArrayList exp = new ArrayList(); @@ -80,17 +77,13 @@ public void testBigrams() throws Exception { NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2); - - Token token = null; - do { - token = filter.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + final Token reusableToken = new Token(); + for (Token nextToken = filter.next(reusableToken); nextToken != null; nextToken = filter.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertEquals(4, tokens.size()); ArrayList exp = new ArrayList(); @@ -100,17 +93,13 @@ public void testNgrams() throws Exception { NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3); - - Token token = null; - do { - token = filter.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + final Token reusableToken = new Token(); + for (Token nextToken = filter.next(reusableToken); nextToken != null; nextToken = filter.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertEquals(12, tokens.size()); ArrayList exp = new ArrayList(); @@ -122,17 +111,13 @@ public void testOversizedNgrams() throws Exception { NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7); - - Token token = null; - do { - token = filter.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + final Token reusableToken = new Token(); + for (Token nextToken = filter.next(reusableToken); nextToken != null; nextToken = filter.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertTrue(tokens.isEmpty()); } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java Wed Aug 20 07:38:07 2008 @@ -59,16 +59,13 @@ public void testUnigrams() throws Exception { NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1); - Token token = null; - do { - token = tokenizer.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + final Token reusableToken = new Token(); + for (Token nextToken = tokenizer.next(reusableToken); nextToken != null; nextToken = tokenizer.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertEquals(5, tokens.size()); ArrayList exp = new ArrayList(); @@ -78,17 +75,13 @@ public void testBigrams() throws Exception { NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2); - - Token token = null; - do { - token = tokenizer.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + final Token reusableToken = new Token(); + for (Token nextToken = tokenizer.next(reusableToken); nextToken != null; nextToken = tokenizer.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertEquals(4, tokens.size()); ArrayList exp = new ArrayList(); @@ -98,17 +91,13 @@ public void testNgrams() throws Exception { NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3); - - Token token = null; - do { - token = tokenizer.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + final Token reusableToken = new Token(); + for (Token nextToken = tokenizer.next(reusableToken); nextToken != null; nextToken = tokenizer.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertEquals(12, tokens.size()); ArrayList exp = new ArrayList(); @@ -120,17 +109,14 @@ public void testOversizedNgrams() throws Exception { NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7); - - Token token = null; - do { - token = tokenizer.next(); - if (token != null) { - tokens.add(token.toString()); -// System.out.println(token.termText()); -// System.out.println(token); -// Thread.sleep(1000); - } - } while (token != null); + + final Token reusableToken = new Token(); + for (Token nextToken = tokenizer.next(reusableToken); nextToken != null; nextToken = tokenizer.next(reusableToken)) { + tokens.add(nextToken.toString()); +// System.out.println(token.term()); +// System.out.println(token); +// Thread.sleep(1000); + } assertTrue(tokens.isEmpty()); } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java Wed Aug 20 07:38:07 2008 @@ -43,20 +43,20 @@ String test = "The quick red fox jumped over the lazy brown dogs"; NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), 3, "D"); - Token tok = new Token(); boolean seenDogs = false; - while ((tok = nptf.next(tok)) != null){ - if (tok.termText().equals("dogs")){ + final Token reusableToken = new Token(); + for (Token nextToken = nptf.next(reusableToken); nextToken != null; nextToken = nptf.next(reusableToken)) { + if (nextToken.term().equals("dogs")){ seenDogs = true; - assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true); - assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null); - byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length - assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length == tok.getPayload().length()); - assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset() == 0); + assertTrue(nextToken.type() + " is not equal to " + "D", nextToken.type().equals("D") == true); + assertTrue("nextToken.getPayload() is null and it shouldn't be", nextToken.getPayload() != null); + byte [] bytes = nextToken.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length + assertTrue(bytes.length + " does not equal: " + nextToken.getPayload().length(), bytes.length == nextToken.getPayload().length()); + assertTrue(nextToken.getPayload().getOffset() + " does not equal: " + 0, nextToken.getPayload().getOffset() == 0); float pay = PayloadHelper.decodeFloat(bytes); assertTrue(pay + " does not equal: " + 3, pay == 3); } else { - assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word")); + assertTrue(nextToken.type() + " is not null and it should be", nextToken.type().equals("word")); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); @@ -67,12 +67,13 @@ super(input); } - public Token next(Token result) throws IOException { - result = input.next(result); - if (result != null && result.termText().equals("dogs")) { - result.setType("D"); + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); + if (nextToken != null && nextToken.term().equals("dogs")) { + nextToken.setType("D"); } - return result; + return nextToken; } } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java Wed Aug 20 07:38:07 2008 @@ -42,17 +42,17 @@ String test = "The quick red fox jumped over the lazy brown dogs"; TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test))); - Token tok = new Token(); int count = 0; - while ((tok = nptf.next(tok)) != null){ - assertTrue("tok is null and it shouldn't be", tok != null); - Payload pay = tok.getPayload(); + final Token reusableToken = new Token(); + for (Token nextToken = nptf.next(reusableToken); nextToken != null; nextToken = nptf.next(reusableToken)) { + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + Payload pay = nextToken.getPayload(); assertTrue("pay is null and it shouldn't be", pay != null); byte [] data = pay.getData(); int start = PayloadHelper.decodeInt(data, 0); - assertTrue(start + " does not equal: " + tok.startOffset(), start == tok.startOffset()); + assertTrue(start + " does not equal: " + nextToken.startOffset(), start == nextToken.startOffset()); int end = PayloadHelper.decodeInt(data, 4); - assertTrue(end + " does not equal: " + tok.endOffset(), end == tok.endOffset()); + assertTrue(end + " does not equal: " + nextToken.endOffset(), end == nextToken.endOffset()); count++; } assertTrue(count + " does not equal: " + 10, count == 10); Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java Wed Aug 20 07:38:07 2008 @@ -44,14 +44,14 @@ String test = "The quick red fox jumped over the lazy brown dogs"; TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test)))); - Token tok = new Token(); int count = 0; - while ((tok = nptf.next(tok)) != null){ - assertTrue(tok.type() + " is not null and it should be", tok.type().equals(String.valueOf(Character.toUpperCase(tok.termBuffer()[0])))); - assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null); - String type = new String(tok.getPayload().getData(), "UTF-8"); + final Token reusableToken = new Token(); + for (Token nextToken = nptf.next(reusableToken); nextToken != null; nextToken = nptf.next(reusableToken)) { + assertTrue(nextToken.type() + " is not null and it should be", nextToken.type().equals(String.valueOf(Character.toUpperCase(nextToken.termBuffer()[0])))); + assertTrue("nextToken.getPayload() is null and it shouldn't be", nextToken.getPayload() != null); + String type = new String(nextToken.getPayload().getData(), "UTF-8"); assertTrue("type is null and it shouldn't be", type != null); - assertTrue(type + " is not equal to " + tok.type(), type.equals(tok.type()) == true); + assertTrue(type + " is not equal to " + nextToken.type(), type.equals(nextToken.type()) == true); count++; } assertTrue(count + " does not equal: " + 10, count == 10); @@ -64,12 +64,13 @@ - public Token next(Token result) throws IOException { - result = input.next(result); - if (result != null) { - result.setType(String.valueOf(Character.toUpperCase(result.termBuffer()[0]))); + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); + if (nextToken != null) { + nextToken.setType(String.valueOf(Character.toUpperCase(nextToken.termBuffer()[0]))); } - return result; + return nextToken; } } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Wed Aug 20 07:38:07 2008 @@ -17,12 +17,17 @@ * limitations under the License. */ -import junit.framework.TestCase; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; -import java.io.*; +import junit.framework.TestCase; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; /** * Test case for RussianAnalyzer. @@ -72,22 +77,26 @@ sampleUnicode, RussianCharsets.UnicodeRussian); + final Token reusableToken = new Token(); + final Token reusableSampleToken = new Token(); + Token nextToken; + Token nextSampleToken; for (;;) { - Token token = in.next(); + nextToken = in.next(reusableToken); - if (token == null) + if (nextToken == null) { break; } - Token sampleToken = sample.next(); + nextSampleToken = sample.next(reusableSampleToken); assertEquals( "Unicode", - token.termText(), - sampleToken == null + nextToken.term(), + nextSampleToken == null ? null - : sampleToken.termText()); + : nextSampleToken.term()); } inWords.close(); @@ -109,22 +118,26 @@ sampleKOI8, RussianCharsets.KOI8); + final Token reusableToken = new Token(); + final Token reusableSampleToken = new Token(); + Token nextToken; + Token nextSampleToken; for (;;) { - Token token = in.next(); + nextToken = in.next(reusableToken); - if (token == null) + if (nextToken == null) { break; } - Token sampleToken = sample.next(); + nextSampleToken = sample.next(reusableSampleToken); assertEquals( "KOI8", - token.termText(), - sampleToken == null + nextToken.term(), + nextSampleToken == null ? null - : sampleToken.termText()); + : nextSampleToken.term()); } @@ -146,22 +159,26 @@ sample1251, RussianCharsets.CP1251); + final Token reusableToken = new Token(); + final Token reusableSampleToken = new Token(); + Token nextToken; + Token nextSampleToken; for (;;) { - Token token = in.next(); + nextToken = in.next(reusableToken); - if (token == null) + if (nextToken == null) { break; } - Token sampleToken = sample.next(); + nextSampleToken = sample.next(reusableSampleToken); assertEquals( "1251", - token.termText(), - sampleToken == null + nextToken.term(), + nextSampleToken == null ? null - : sampleToken.termText()); + : nextSampleToken.term()); } @@ -175,9 +192,10 @@ RussianAnalyzer ra = new RussianAnalyzer(); TokenStream stream = ra.tokenStream("", reader); + final Token reusableToken = new Token(); try { - assertEquals("text", stream.next().termText()); - assertNotNull("RussianAnalyzer's tokenizer skips numbers from input text", stream.next()); + assertEquals("text", stream.next(reusableToken).term()); + assertNotNull("RussianAnalyzer's tokenizer skips numbers from input text", stream.next(reusableToken)); } catch (IOException e) { Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Wed Aug 20 07:38:07 2008 @@ -156,11 +156,11 @@ TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence")); - Token token; int j = -1; - while ((token = ts.next()) != null) { - j += token.getPositionIncrement(); - String termText = new String(token.termBuffer(), 0, token.termLength()); + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + j += nextToken.getPositionIncrement(); + String termText = nextToken.term(); q.add(new Term("content", termText), j); } @@ -182,9 +182,9 @@ TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence")); - Token token; - while ((token = ts.next()) != null) { - String termText = new String(token.termBuffer(), 0, token.termLength()); + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + String termText = nextToken.term(); q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD); } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Wed Aug 20 07:38:07 2008 @@ -35,7 +35,8 @@ this.testToken = testToken; } - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (index < testToken.length) { return testToken[index++]; } else { @@ -49,28 +50,28 @@ } public static final Token[] TEST_TOKEN = new Token[] { - new Token("please", 0, 6), - new Token("divide", 7, 13), - new Token("this", 14, 18), - new Token("sentence", 19, 27), - new Token("into", 28, 32), - new Token("shingles", 33, 39), + createToken("please", 0, 6), + createToken("divide", 7, 13), + createToken("this", 14, 18), + createToken("sentence", 19, 27), + createToken("into", 28, 32), + createToken("shingles", 33, 39), }; public static Token[] testTokenWithHoles; public static final Token[] BI_GRAM_TOKENS = new Token[] { - new Token("please", 0, 6), - new Token("please divide", 0, 13), - new Token("divide", 7, 13), - new Token("divide this", 7, 18), - new Token("this", 14, 18), - new Token("this sentence", 14, 27), - new Token("sentence", 19, 27), - new Token("sentence into", 19, 32), - new Token("into", 28, 32), - new Token("into shingles", 28, 39), - new Token("shingles", 33, 39), + createToken("please", 0, 6), + createToken("please divide", 0, 13), + createToken("divide", 7, 13), + createToken("divide this", 7, 18), + createToken("this", 14, 18), + createToken("this sentence", 14, 27), + createToken("sentence", 19, 27), + createToken("sentence into", 19, 32), + createToken("into", 28, 32), + createToken("into shingles", 28, 39), + createToken("shingles", 33, 39), }; public static final int[] BI_GRAM_POSITION_INCREMENTS = new int[] { @@ -83,17 +84,17 @@ }; public static final Token[] BI_GRAM_TOKENS_WITH_HOLES = new Token[] { - new Token("please", 0, 6), - new Token("please divide", 0, 13), - new Token("divide", 7, 13), - new Token("divide _", 7, 19), - new Token("_", 19, 19), - new Token("_ sentence", 19, 27), - new Token("sentence", 19, 27), - new Token("sentence _", 19, 33), - new Token("_", 33, 33), - new Token("_ shingles", 33, 39), - new Token("shingles", 33, 39), + createToken("please", 0, 6), + createToken("please divide", 0, 13), + createToken("divide", 7, 13), + createToken("divide _", 7, 19), + createToken("_", 19, 19), + createToken("_ sentence", 19, 27), + createToken("sentence", 19, 27), + createToken("sentence _", 19, 33), + createToken("_", 33, 33), + createToken("_ shingles", 33, 39), + createToken("shingles", 33, 39), }; public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES = new int[] { @@ -101,21 +102,21 @@ }; public static final Token[] TRI_GRAM_TOKENS = new Token[] { - new Token("please", 0, 6), - new Token("please divide", 0, 13), - new Token("please divide this", 0, 18), - new Token("divide", 7, 13), - new Token("divide this", 7, 18), - new Token("divide this sentence", 7, 27), - new Token("this", 14, 18), - new Token("this sentence", 14, 27), - new Token("this sentence into", 14, 32), - new Token("sentence", 19, 27), - new Token("sentence into", 19, 32), - new Token("sentence into shingles", 19, 39), - new Token("into", 28, 32), - new Token("into shingles", 28, 39), - new Token("shingles", 33, 39) + createToken("please", 0, 6), + createToken("please divide", 0, 13), + createToken("please divide this", 0, 18), + createToken("divide", 7, 13), + createToken("divide this", 7, 18), + createToken("divide this sentence", 7, 27), + createToken("this", 14, 18), + createToken("this sentence", 14, 27), + createToken("this sentence into", 14, 32), + createToken("sentence", 19, 27), + createToken("sentence into", 19, 32), + createToken("sentence into shingles", 19, 39), + createToken("into", 28, 32), + createToken("into shingles", 28, 39), + createToken("shingles", 33, 39) }; public static final int[] TRI_GRAM_POSITION_INCREMENTS = new int[] { @@ -135,10 +136,10 @@ protected void setUp() throws Exception { super.setUp(); testTokenWithHoles = new Token[] { - new Token("please", 0, 6), - new Token("divide", 7, 13), - new Token("sentence", 19, 27), - new Token("shingles", 33, 39), + createToken("please", 0, 6), + createToken("divide", 7, 13), + createToken("sentence", 19, 27), + createToken("shingles", 33, 39), }; testTokenWithHoles[2].setPositionIncrement(2); @@ -168,22 +169,27 @@ throws IOException { TokenStream filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize); - Token token; int i = 0; - - while ((token = filter.next()) != null) { - String termText = new String(token.termBuffer(), 0, token.termLength()); - String goldText - = new String(tokensToCompare[i].termBuffer(), 0, tokensToCompare[i].termLength()); + final Token reusableToken = new Token(); + for (Token nextToken = filter.next(reusableToken); nextToken != null; nextToken = filter.next(reusableToken)) { + String termText = nextToken.term(); + String goldText = tokensToCompare[i].term(); assertEquals("Wrong termText", goldText, termText); assertEquals("Wrong startOffset for token \"" + termText + "\"", - tokensToCompare[i].startOffset(), token.startOffset()); + tokensToCompare[i].startOffset(), nextToken.startOffset()); assertEquals("Wrong endOffset for token \"" + termText + "\"", - tokensToCompare[i].endOffset(), token.endOffset()); + tokensToCompare[i].endOffset(), nextToken.endOffset()); assertEquals("Wrong positionIncrement for token \"" + termText + "\"", - positionIncrements[i], token.getPositionIncrement()); - assertEquals("Wrong type for token \"" + termText + "\"", types[i], token.type()); + positionIncrements[i], nextToken.getPositionIncrement()); + assertEquals("Wrong type for token \"" + termText + "\"", types[i], nextToken.type()); i++; } } + + private static Token createToken(String term, int start, int offset) + { + Token token = new Token(start, offset); + token.setTermBuffer(term); + return token; + } } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java Wed Aug 20 07:38:07 2008 @@ -40,29 +40,23 @@ ShingleMatrixFilter.defaultSettingsCodec = null; - Token token = new Token(); // for debug use only - - - - TokenStream ts; - ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec()); - assertNull(ts.next()); + assertNull(ts.next(new Token())); TokenListStream tls; LinkedList tokens; - // test a plain old token stream with synonyms tranlated to rows. + // test a plain old token stream with synonyms translated to rows. tokens = new LinkedList(); - tokens.add(new Token("please", 0, 6)); - tokens.add(new Token("divide", 7, 13)); - tokens.add(new Token("this", 14, 18)); - tokens.add(new Token("sentence", 19, 27)); - tokens.add(new Token("into", 28, 32)); - tokens.add(new Token("shingles", 33, 39)); + tokens.add(createToken("please", 0, 6)); + tokens.add(createToken("divide", 7, 13)); + tokens.add(createToken("this", 14, 18)); + tokens.add(createToken("sentence", 19, 27)); + tokens.add(createToken("into", 28, 32)); + tokens.add(createToken("shingles", 33, 39)); tls = new TokenListStream(tokens); @@ -70,20 +64,22 @@ ts = new ShingleMatrixFilter(tls, 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec()); - assertNext(ts, "please", 0, 6); - assertNext(ts, "please divide", 0, 13); - assertNext(ts, "divide", 7, 13); - assertNext(ts, "divide this", 7, 18); - assertNext(ts, "this", 14, 18); - assertNext(ts, "this sentence", 14, 27); - assertNext(ts, "sentence", 19, 27); - assertNext(ts, "sentence into", 19, 32); - assertNext(ts, "into", 28, 32); - assertNext(ts, "into shingles", 28, 39); - assertNext(ts, "shingles", 33, 39); + Token reusableToken = new Token(); + + assertNext(ts, reusableToken, "please", 0, 6); + assertNext(ts, reusableToken, "please divide", 0, 13); + assertNext(ts, reusableToken, "divide", 7, 13); + assertNext(ts, reusableToken, "divide this", 7, 18); + assertNext(ts, reusableToken, "this", 14, 18); + assertNext(ts, reusableToken, "this sentence", 14, 27); + assertNext(ts, reusableToken, "sentence", 19, 27); + assertNext(ts, reusableToken, "sentence into", 19, 32); + assertNext(ts, reusableToken, "into", 28, 32); + assertNext(ts, reusableToken, "into shingles", 28, 39); + assertNext(ts, reusableToken, "shingles", 33, 39); - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); } @@ -95,9 +91,6 @@ ShingleMatrixFilter.defaultSettingsCodec = null;//new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec(); - Token token = new Token(); // for debug use only - - TokenStream ts; TokenListStream tls; LinkedList tokens; @@ -117,25 +110,26 @@ ts = new ShingleMatrixFilter(tls, 2, 2, '_', false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec()); - assertNext(ts, "hello_world"); - assertNext(ts, "greetings_world"); - assertNext(ts, "hello_earth"); - assertNext(ts, "greetings_earth"); - assertNext(ts, "hello_tellus"); - assertNext(ts, "greetings_tellus"); - assertNull(ts.next()); + final Token reusableToken = new Token(); + assertNext(ts, reusableToken, "hello_world"); + assertNext(ts, reusableToken, "greetings_world"); + assertNext(ts, reusableToken, "hello_earth"); + assertNext(ts, reusableToken, "greetings_earth"); + assertNext(ts, reusableToken, "hello_tellus"); + assertNext(ts, reusableToken, "greetings_tellus"); + assertNull(ts.next(reusableToken)); // bi-grams with no spacer character, start offset, end offset tls.reset(); ts = new ShingleMatrixFilter(tls, 2, 2, null, false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec()); - assertNext(ts, "helloworld", 0, 10); - assertNext(ts, "greetingsworld", 0, 10); - assertNext(ts, "helloearth", 0, 10); - assertNext(ts, "greetingsearth", 0, 10); - assertNext(ts, "hellotellus", 0, 10); - assertNext(ts, "greetingstellus", 0, 10); - assertNull(ts.next()); + assertNext(ts, reusableToken, "helloworld", 0, 10); + assertNext(ts, reusableToken, "greetingsworld", 0, 10); + assertNext(ts, reusableToken, "helloearth", 0, 10); + assertNext(ts, reusableToken, "greetingsearth", 0, 10); + assertNext(ts, reusableToken, "hellotellus", 0, 10); + assertNext(ts, reusableToken, "greetingstellus", 0, 10); + assertNull(ts.next(reusableToken)); // add ^_prefix_and_suffix_$ @@ -160,119 +154,119 @@ ts = new ShingleMatrixFilter(tls, 2, 2, '_', false); // -// while ((token = ts.next(token)) != null) { -// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) { +// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); // token.clear(); // } - assertNext(ts, "^_hello", 1, 10.049875f, 0, 4); - assertNext(ts, "^_greetings", 1, 10.049875f, 0, 4); - assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "world_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "earth_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10); - assertNull(ts.next()); + assertNext(ts, reusableToken, "^_hello", 1, 10.049875f, 0, 4); + assertNext(ts, reusableToken, "^_greetings", 1, 10.049875f, 0, 4); + assertNext(ts, reusableToken, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "world_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "earth_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "tellus_$", 1, 7.1414285f, 5, 10); + assertNull(ts.next(reusableToken)); // test unlimited size and allow single boundary token as shingle tls.reset(); ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', false); // -// while ((token = ts.next(token)) != null) { -// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) { +// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); // token.clear(); // } - assertNext(ts, "^", 1, 10.0f, 0, 0); - assertNext(ts, "^_hello", 1, 10.049875f, 0, 4); - assertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10); - assertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10); - assertNext(ts, "hello", 1, 1.0f, 0, 4); - assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "world", 1, 1.0f, 5, 10); - assertNext(ts, "world_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "$", 1, 7.071068f, 10, 10); - assertNext(ts, "^_greetings", 1, 10.049875f, 0, 4); - assertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10); - assertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10); - assertNext(ts, "greetings", 1, 1.0f, 0, 4); - assertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10); - assertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10); - assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "earth", 1, 1.0f, 5, 10); - assertNext(ts, "earth_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10); - assertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10); - assertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10); - assertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10); - assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "tellus", 1, 1.0f, 5, 10); - assertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10); - assertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10); - assertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "^", 1, 10.0f, 0, 0); + assertNext(ts, reusableToken, "^_hello", 1, 10.049875f, 0, 4); + assertNext(ts, reusableToken, "^_hello_world", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_hello_world_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "hello", 1, 1.0f, 0, 4); + assertNext(ts, reusableToken, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_world_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "world", 1, 1.0f, 5, 10); + assertNext(ts, reusableToken, "world_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "$", 1, 7.071068f, 10, 10); + assertNext(ts, reusableToken, "^_greetings", 1, 10.049875f, 0, 4); + assertNext(ts, reusableToken, "^_greetings_world", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_greetings_world_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "greetings", 1, 1.0f, 0, 4); + assertNext(ts, reusableToken, "greetings_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_world_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "^_hello_earth", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_hello_earth_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_earth_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "earth", 1, 1.0f, 5, 10); + assertNext(ts, reusableToken, "earth_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "^_greetings_earth", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_greetings_earth_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "greetings_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_earth_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "^_hello_tellus", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_hello_tellus_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_tellus_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "tellus", 1, 1.0f, 5, 10); + assertNext(ts, reusableToken, "tellus_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "^_greetings_tellus", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_greetings_tellus_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "greetings_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_tellus_$", 1, 7.2111025f, 0, 10); - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); // test unlimited size but don't allow single boundary token as shingle tls.reset(); ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', true); -// while ((token = ts.next(token)) != null) { -// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) { +// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); // token.clear(); // } - assertNext(ts, "^_hello", 1, 10.049875f, 0, 4); - assertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10); - assertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10); - assertNext(ts, "hello", 1, 1.0f, 0, 4); - assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "world", 1, 1.0f, 5, 10); - assertNext(ts, "world_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "^_greetings", 1, 10.049875f, 0, 4); - assertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10); - assertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10); - assertNext(ts, "greetings", 1, 1.0f, 0, 4); - assertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10); - assertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10); - assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "earth", 1, 1.0f, 5, 10); - assertNext(ts, "earth_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10); - assertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10); - assertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10); - assertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10); - assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10); - assertNext(ts, "tellus", 1, 1.0f, 5, 10); - assertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10); - assertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10); - assertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10); - assertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "^_hello", 1, 10.049875f, 0, 4); + assertNext(ts, reusableToken, "^_hello_world", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_hello_world_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "hello", 1, 1.0f, 0, 4); + assertNext(ts, reusableToken, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_world_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "world", 1, 1.0f, 5, 10); + assertNext(ts, reusableToken, "world_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "^_greetings", 1, 10.049875f, 0, 4); + assertNext(ts, reusableToken, "^_greetings_world", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_greetings_world_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "greetings", 1, 1.0f, 0, 4); + assertNext(ts, reusableToken, "greetings_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_world_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "^_hello_earth", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_hello_earth_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_earth_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "earth", 1, 1.0f, 5, 10); + assertNext(ts, reusableToken, "earth_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "^_greetings_earth", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_greetings_earth_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "greetings_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_earth_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "^_hello_tellus", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_hello_tellus_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_tellus_$", 1, 7.2111025f, 0, 10); + assertNext(ts, reusableToken, "tellus", 1, 1.0f, 5, 10); + assertNext(ts, reusableToken, "tellus_$", 1, 7.1414285f, 5, 10); + assertNext(ts, reusableToken, "^_greetings_tellus", 1, 10.099504f, 0, 10); + assertNext(ts, reusableToken, "^_greetings_tellus_$", 1, 12.328828f, 0, 10); + assertNext(ts, reusableToken, "greetings_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_tellus_$", 1, 7.2111025f, 0, 10); - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); System.currentTimeMillis(); @@ -300,27 +294,27 @@ ts = new ShingleMatrixFilter(tls, 2, 3, '_', false); -// while ((token = ts.next(token)) != null) { -// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) { +// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); // token.clear(); // } // shingle, position increment, weight, start offset, end offset - assertNext(ts, "hello_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "greetings_and", 1, 1.4142135f, 0, 4); - assertNext(ts, "greetings_and_salutations", 1, 1.7320508f, 0, 4); - assertNext(ts, "and_salutations", 1, 1.4142135f, 0, 4); - assertNext(ts, "and_salutations_world", 1, 1.7320508f, 0, 10); - assertNext(ts, "salutations_world", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "and_salutations_earth", 1, 1.7320508f, 0, 10); - assertNext(ts, "salutations_earth", 1, 1.4142135f, 0, 10); - assertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10); - assertNext(ts, "and_salutations_tellus", 1, 1.7320508f, 0, 10); - assertNext(ts, "salutations_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "greetings_and", 1, 1.4142135f, 0, 4); + assertNext(ts, reusableToken, "greetings_and_salutations", 1, 1.7320508f, 0, 4); + assertNext(ts, reusableToken, "and_salutations", 1, 1.4142135f, 0, 4); + assertNext(ts, reusableToken, "and_salutations_world", 1, 1.7320508f, 0, 10); + assertNext(ts, reusableToken, "salutations_world", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "and_salutations_earth", 1, 1.7320508f, 0, 10); + assertNext(ts, reusableToken, "salutations_earth", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "hello_tellus", 1, 1.4142135f, 0, 10); + assertNext(ts, reusableToken, "and_salutations_tellus", 1, 1.7320508f, 0, 10); + assertNext(ts, reusableToken, "salutations_tellus", 1, 1.4142135f, 0, 10); - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); System.currentTimeMillis(); @@ -361,53 +355,53 @@ TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec()); -// Token token = new Token(); -// while ((token = ts.next(token)) != null) { -// System.out.println("assertNext(ts, \"" + token.termText() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); +// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) { +// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");"); // token.clear(); // } - assertNext(ts, "no_surprise", 1, 1.4142135f, 0, 0); - assertNext(ts, "no_surprise_to", 1, 1.7320508f, 0, 0); - assertNext(ts, "no_surprise_to_see", 1, 2.0f, 0, 0); - assertNext(ts, "surprise_to", 1, 1.4142135f, 0, 0); - assertNext(ts, "surprise_to_see", 1, 1.7320508f, 0, 0); - assertNext(ts, "surprise_to_see_england", 1, 2.0f, 0, 0); - assertNext(ts, "to_see", 1, 1.4142135f, 0, 0); - assertNext(ts, "to_see_england", 1, 1.7320508f, 0, 0); - assertNext(ts, "to_see_england_manager", 1, 2.0f, 0, 0); - assertNext(ts, "see_england", 1, 1.4142135f, 0, 0); - assertNext(ts, "see_england_manager", 1, 1.7320508f, 0, 0); - assertNext(ts, "see_england_manager_svennis", 1, 2.0f, 0, 0); - assertNext(ts, "england_manager", 1, 1.4142135f, 0, 0); - assertNext(ts, "england_manager_svennis", 1, 1.7320508f, 0, 0); - assertNext(ts, "england_manager_svennis_in", 1, 2.0f, 0, 0); - assertNext(ts, "manager_svennis", 1, 1.4142135f, 0, 0); - assertNext(ts, "manager_svennis_in", 1, 1.7320508f, 0, 0); - assertNext(ts, "manager_svennis_in_the", 1, 2.0f, 0, 0); - assertNext(ts, "svennis_in", 1, 1.4142135f, 0, 0); - assertNext(ts, "svennis_in_the", 1, 1.7320508f, 0, 0); - assertNext(ts, "svennis_in_the_croud", 1, 2.0f, 0, 0); - assertNext(ts, "in_the", 1, 1.4142135f, 0, 0); - assertNext(ts, "in_the_croud", 1, 1.7320508f, 0, 0); - assertNext(ts, "the_croud", 1, 1.4142135f, 0, 0); - assertNext(ts, "see_england_manager_sven", 1, 2.0f, 0, 0); - assertNext(ts, "england_manager_sven", 1, 1.7320508f, 0, 0); - assertNext(ts, "england_manager_sven_göran", 1, 2.0f, 0, 0); - assertNext(ts, "manager_sven", 1, 1.4142135f, 0, 0); - assertNext(ts, "manager_sven_göran", 1, 1.7320508f, 0, 0); - assertNext(ts, "manager_sven_göran_eriksson", 1, 2.0f, 0, 0); - assertNext(ts, "sven_göran", 1, 1.4142135f, 0, 0); - assertNext(ts, "sven_göran_eriksson", 1, 1.7320508f, 0, 0); - assertNext(ts, "sven_göran_eriksson_in", 1, 2.0f, 0, 0); - assertNext(ts, "göran_eriksson", 1, 1.4142135f, 0, 0); - assertNext(ts, "göran_eriksson_in", 1, 1.7320508f, 0, 0); - assertNext(ts, "göran_eriksson_in_the", 1, 2.0f, 0, 0); - assertNext(ts, "eriksson_in", 1, 1.4142135f, 0, 0); - assertNext(ts, "eriksson_in_the", 1, 1.7320508f, 0, 0); - assertNext(ts, "eriksson_in_the_croud", 1, 2.0f, 0, 0); + final Token reusableToken = new Token(); + assertNext(ts, reusableToken, "no_surprise", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "no_surprise_to", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "no_surprise_to_see", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "surprise_to", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "surprise_to_see", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "surprise_to_see_england", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "to_see", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "to_see_england", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "to_see_england_manager", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "see_england", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "see_england_manager", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "see_england_manager_svennis", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "england_manager", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "england_manager_svennis", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "england_manager_svennis_in", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "manager_svennis", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "manager_svennis_in", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "manager_svennis_in_the", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "svennis_in", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "svennis_in_the", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "svennis_in_the_croud", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "in_the", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "in_the_croud", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "the_croud", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "see_england_manager_sven", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "england_manager_sven", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "england_manager_sven_göran", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "manager_sven", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "manager_sven_göran", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "manager_sven_göran_eriksson", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "sven_göran", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "sven_göran_eriksson", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "sven_göran_eriksson_in", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "göran_eriksson", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "göran_eriksson_in", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "göran_eriksson_in_the", 1, 2.0f, 0, 0); + assertNext(ts, reusableToken, "eriksson_in", 1, 1.4142135f, 0, 0); + assertNext(ts, reusableToken, "eriksson_in_the", 1, 1.7320508f, 0, 0); + assertNext(ts, reusableToken, "eriksson_in_the_croud", 1, 2.0f, 0, 0); - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); } @@ -417,11 +411,9 @@ private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) { - Token token = new Token(); - token.setTermText(text); + Token token = new Token(startOffset, endOffset); + token.setTermBuffer(text); token.setPositionIncrement(posIncr); - token.setStartOffset(startOffset); - token.setEndOffset(endOffset); return token; } @@ -435,61 +427,64 @@ } private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) { - Token token = new Token(); - token.setTermText(text); + Token token = new Token(startOffset, endOffset); + token.setTermBuffer(text); token.setPositionIncrement(posIncr); ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); - token.setStartOffset(startOffset); - token.setEndOffset(endOffset); return token; } private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, ShingleMatrixFilter.TokenPositioner positioner) { - Token token = new Token(); - token.setTermText(text); + Token token = new Token(startOffset, endOffset); + token.setTermBuffer(text); token.setPositionIncrement(posIncr); ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); - token.setStartOffset(startOffset); - token.setEndOffset(endOffset); ShingleMatrixFilter.defaultSettingsCodec.setTokenPositioner(token, positioner); return token; } // assert-methods start here - private Token assertNext(TokenStream ts, String text) throws IOException { - Token token = ts.next(new Token()); - assertNotNull(token); - assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); - return token; - } - - private Token assertNext(TokenStream ts, String text, int positionIncrement, float boost) throws IOException { - Token token = ts.next(new Token()); - assertNotNull(token); - assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); - assertEquals(positionIncrement, token.getPositionIncrement()); - assertEquals(boost, token.getPayload() == null ? 1f : PayloadHelper.decodeFloat(token.getPayload().getData())); - return token; - } - - private Token assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException { - Token token = ts.next(new Token()); - assertNotNull(token); - assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); - assertEquals(positionIncrement, token.getPositionIncrement()); - assertEquals(boost, token.getPayload() == null ? 1f : PayloadHelper.decodeFloat(token.getPayload().getData())); - assertEquals(startOffset, token.startOffset()); - assertEquals(endOffset, token.endOffset()); - return token; - } - - private Token assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { - Token token = ts.next(new Token()); - assertNotNull(token); - assertEquals(text, new String(token.termBuffer(), 0, token.termLength())); - assertEquals(startOffset, token.startOffset()); - assertEquals(endOffset, token.endOffset()); + private Token assertNext(TokenStream ts, final Token reusableToken, String text) throws IOException { + Token nextToken = ts.next(reusableToken); + assertNotNull(nextToken); + assertEquals(text, nextToken.term()); + return nextToken; + } + + private Token assertNext(TokenStream ts, final Token reusableToken, String text, int positionIncrement, float boost) throws IOException { + Token nextToken = ts.next(reusableToken); + assertNotNull(nextToken); + assertEquals(text, nextToken.term()); + assertEquals(positionIncrement, nextToken.getPositionIncrement()); + assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData())); + return nextToken; + } + + private Token assertNext(TokenStream ts, final Token reusableToken, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException { + Token nextToken = ts.next(reusableToken); + assertNotNull(nextToken); + assertEquals(text, nextToken.term()); + assertEquals(positionIncrement, nextToken.getPositionIncrement()); + assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData())); + assertEquals(startOffset, nextToken.startOffset()); + assertEquals(endOffset, nextToken.endOffset()); + return nextToken; + } + + private Token assertNext(TokenStream ts, final Token reusableToken, String text, int startOffset, int endOffset) throws IOException { + Token nextToken = ts.next(reusableToken); + assertNotNull(nextToken); + assertEquals(text, nextToken.term()); + assertEquals(startOffset, nextToken.startOffset()); + assertEquals(endOffset, nextToken.endOffset()); + return nextToken; + } + + private static Token createToken(String term, int start, int offset) + { + Token token = new Token(start, offset); + token.setTermBuffer(term); return token; } @@ -500,9 +495,9 @@ public TokenListStream(TokenStream ts) throws IOException { tokens = new ArrayList(); - Token token; - while ((token = ts.next(new Token())) != null) { - tokens.add(token); + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + tokens.add((Token) nextToken.clone()); } } @@ -512,14 +507,16 @@ private Iterator iterator; - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (iterator == null) { iterator = tokens.iterator(); } if (!iterator.hasNext()) { return null; } - return iterator.next(); + Token nextToken = (Token) iterator.next(); + return (Token) nextToken.clone(); } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java Wed Aug 20 07:38:07 2008 @@ -43,13 +43,13 @@ DateRecognizerSinkTokenizer sink = new DateRecognizerSinkTokenizer(new SimpleDateFormat("MM/dd/yyyy")); String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006"; TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)), sink); - Token tok = null; int count = 0; - while ((tok = tee.next()) != null){ - assertTrue("tok is null and it shouldn't be", tok != null); - if (tok.termBuffer()[0] == '7'){ - assertTrue(tok.type() + " is not equal to " + DateRecognizerSinkTokenizer.DATE_TYPE, - tok.type().equals(DateRecognizerSinkTokenizer.DATE_TYPE) == true); + final Token reusableToken = new Token(); + for (Token nextToken = tee.next(reusableToken); nextToken != null; nextToken = tee.next(reusableToken)) { + assertTrue("nextToken is null and it shouldn't be", nextToken != null); + if (nextToken.termBuffer()[0] == '7'){ + assertTrue(nextToken.type() + " is not equal to " + DateRecognizerSinkTokenizer.DATE_TYPE, + nextToken.type().equals(DateRecognizerSinkTokenizer.DATE_TYPE) == true); } count++; } Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Wed Aug 20 07:38:07 2008 @@ -42,10 +42,10 @@ TokenRangeSinkTokenizer rangeToks = new TokenRangeSinkTokenizer(2, 4); String test = "The quick red fox jumped over the lazy brown dogs"; TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)), rangeToks); - Token tok = null; int count = 0; - while ((tok = tee.next()) != null){ - assertTrue("tok is null and it shouldn't be", tok != null); + final Token reusableToken = new Token(); + for (Token nextToken = tee.next(reusableToken); nextToken != null; nextToken = tee.next(reusableToken)) { + assertTrue("nextToken is null and it shouldn't be", nextToken != null); count++; } assertTrue(count + " does not equal: " + 10, count == 10); Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java Wed Aug 20 07:38:07 2008 @@ -16,13 +16,17 @@ * limitations under the License. */ -import junit.framework.TestCase; -import org.apache.lucene.analysis.*; -import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter; - import java.io.IOException; import java.io.StringReader; +import junit.framework.TestCase; + +import org.apache.lucene.analysis.TeeTokenFilter; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; + public class TokenTypeSinkTokenizerTest extends TestCase { @@ -42,14 +46,14 @@ String test = "The quick red fox jumped over the lazy brown dogs"; TeeTokenFilter ttf = new TeeTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), sink); - Token tok = new Token(); boolean seenDogs = false; - while ((tok = ttf.next(tok)) != null) { - if (tok.termText().equals("dogs")) { + final Token reusableToken = new Token(); + for (Token nextToken = ttf.next(reusableToken); nextToken != null; nextToken = ttf.next(reusableToken)) { + if (nextToken.term().equals("dogs")) { seenDogs = true; - assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true); + assertTrue(nextToken.type() + " is not equal to " + "D", nextToken.type().equals("D") == true); } else { - assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word")); + assertTrue(nextToken.type() + " is not null and it should be", nextToken.type().equals("word")); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); @@ -61,12 +65,13 @@ super(input); } - public Token next(Token result) throws IOException { - result = input.next(result); - if (result != null && result.termText().equals("dogs")) { - result.setType("D"); + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); + if (nextToken != null && nextToken.term().equals("dogs")) { + nextToken.setType("D"); } - return result; + return nextToken; } } } \ No newline at end of file Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original) +++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Wed Aug 20 07:38:07 2008 @@ -36,13 +36,13 @@ throws Exception { TokenStream ts = a.tokenStream("dummy", new StringReader(input)); - + final Token reusableToken = new Token(); for (int i = 0; i < output.length; i++) { - Token t = ts.next(); - assertNotNull(t); - assertEquals(t.termText(), output[i]); + Token nextToken = ts.next(reusableToken); + assertNotNull(nextToken); + assertEquals(nextToken.term(), output[i]); } - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); ts.close(); } Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original) +++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Wed Aug 20 07:38:07 2008 @@ -22,6 +22,7 @@ import java.util.Iterator; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.util.PriorityQueue; @@ -217,7 +218,7 @@ try { - org.apache.lucene.analysis.Token token; + final Token reusableToken = new Token(); String tokenText; int startOffset; int endOffset; @@ -225,10 +226,12 @@ textFragmenter.start(text); TokenGroup tokenGroup=new TokenGroup(); - token = tokenStream.next(); - while ((token!= null)&&(token.startOffset()< maxDocCharsToAnalyze)) + + for (Token nextToken = tokenStream.next(reusableToken); + (nextToken!= null)&&(nextToken.startOffset()< maxDocCharsToAnalyze); + nextToken = tokenStream.next(reusableToken)) { - if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token))) + if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(nextToken))) { //the current token is distinct from previous tokens - // markup the cached token group info @@ -244,7 +247,7 @@ tokenGroup.clear(); //check if current token marks the start of a new fragment - if(textFragmenter.isNewFragment(token)) + if(textFragmenter.isNewFragment(nextToken)) { currentFrag.setScore(fragmentScorer.getFragmentScore()); //record stats for a new fragment @@ -255,13 +258,12 @@ } } - tokenGroup.addToken(token,fragmentScorer.getTokenScore(token)); + tokenGroup.addToken(nextToken,fragmentScorer.getTokenScore(nextToken)); // if(lastEndOffset>maxDocBytesToAnalyze) // { // break; // } - token = tokenStream.next(); } currentFrag.setScore(fragmentScorer.getFragmentScore()); Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original) +++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Wed Aug 20 07:38:07 2008 @@ -106,7 +106,7 @@ */ public float getTokenScore(Token token) { - String termText=token.termText(); + String termText=token.term(); WeightedTerm queryTerm=(WeightedTerm) termsToFind.get(termText); if(queryTerm==null) Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java (original) +++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java Wed Aug 20 07:38:07 2008 @@ -62,7 +62,7 @@ return false; } - WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(new String(token.termBuffer(), 0, token.termLength())); + WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(token.term()); if (wSpanTerm != null) { List positionSpans = wSpanTerm.getPositionSpans(); Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java (original) +++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java Wed Aug 20 07:38:07 2008 @@ -121,7 +121,7 @@ */ public float getTokenScore(Token token) { position += token.getPositionIncrement(); - String termText = new String(token.termBuffer(), 0, token.termLength()); + String termText = token.term(); WeightedSpanTerm weightedSpanTerm; Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java (original) +++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java Wed Aug 20 07:38:07 2008 @@ -61,7 +61,7 @@ tot+=score; } } - tokens[numTokens]=token; + tokens[numTokens]= (Token) token.clone(); scores[numTokens]=score; numTokens++; } Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original) +++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Wed Aug 20 07:38:07 2008 @@ -147,8 +147,9 @@ { this.tokens=tokens; } - public Token next() + public Token next(final Token reusableToken) { + assert reusableToken != null; if(currentToken>=tokens.length) { return null; @@ -160,6 +161,7 @@ String[] terms=tpv.getTerms(); int[] freq=tpv.getTermFrequencies(); int totalTokens=0; + Token newToken = new Token(); for (int t = 0; t < freq.length; t++) { totalTokens+=freq[t]; @@ -189,9 +191,8 @@ } for (int tp = 0; tp < offsets.length; tp++) { - unsortedTokens.add(new Token(terms[t], - offsets[tp].getStartOffset(), - offsets[tp].getEndOffset())); + newToken.reinit(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset()); + unsortedTokens.add(newToken.clone()); } } else @@ -204,9 +205,8 @@ //tokens stored with positions - can use this to index straight into sorted array for (int tp = 0; tp < pos.length; tp++) { - tokensInOriginalOrder[pos[tp]]=new Token(terms[t], - offsets[tp].getStartOffset(), - offsets[tp].getEndOffset()); + newToken.reinit(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset()); + tokensInOriginalOrder[pos[tp]] = (Token) newToken.clone(); } } } @@ -261,7 +261,7 @@ } return getTokenStream(field, contents, analyzer); } - //conevenience method + //convenience method public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer){ return analyzer.tokenStream(field,new StringReader(contents)); }