Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 18738 invoked from network); 20 Aug 2008 14:39:10 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 20 Aug 2008 14:39:10 -0000 Received: (qmail 24896 invoked by uid 500); 20 Aug 2008 14:39:08 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 24872 invoked by uid 500); 20 Aug 2008 14:39:08 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 24863 invoked by uid 99); 20 Aug 2008 14:39:08 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2008 07:39:08 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2008 14:38:18 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 4EB542388A15; Wed, 20 Aug 2008 07:38:17 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r687357 [3/6] - in /lucene/java/trunk: ./ contrib/analyzers/src/java/org/apache/lucene/analysis/br/ contrib/analyzers/src/java/org/apache/lucene/analysis/cjk/ contrib/analyzers/src/java/org/apache/lucene/analysis/cn/ contrib/analyzers/src/j... Date: Wed, 20 Aug 2008 14:38:11 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080820143817.4EB542388A15@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original) +++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Wed Aug 20 07:38:07 2008 @@ -1127,21 +1127,22 @@ { lst = new ArrayList(); Token t; - t = new Token("hi", 0, 2); + t = createToken("hi", 0, 2); lst.add(t); - t = new Token("hispeed", 0, 8); + t = createToken("hispeed", 0, 8); lst.add(t); - t = new Token("speed", 3, 8); + t = createToken("speed", 3, 8); t.setPositionIncrement(0); lst.add(t); - t = new Token("10", 8, 10); + t = createToken("10", 8, 10); lst.add(t); - t = new Token("foo", 11, 14); + t = createToken("foo", 11, 14); lst.add(t); iter = lst.iterator(); } - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; return iter.hasNext() ? (Token) iter.next() : null; } }; @@ -1156,21 +1157,22 @@ { lst = new ArrayList(); Token t; - t = new Token("hispeed", 0, 8); + t = createToken("hispeed", 0, 8); lst.add(t); - t = new Token("hi", 0, 2); + t = createToken("hi", 0, 2); t.setPositionIncrement(0); lst.add(t); - t = new Token("speed", 3, 8); + t = createToken("speed", 3, 8); lst.add(t); - t = new Token("10", 8, 10); + t = createToken("10", 8, 10); lst.add(t); - t = new Token("foo", 11, 14); + t = createToken("foo", 11, 14); lst.add(t); iter = lst.iterator(); } - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; return iter.hasNext() ? (Token) iter.next() : null; } }; @@ -1407,6 +1409,13 @@ super.tearDown(); } + private static Token createToken(String term, int start, int offset) + { + Token token = new Token(start, offset); + token.setTermBuffer(term); + return token; + } + } // =================================================================== @@ -1453,31 +1462,32 @@ this.synonyms = synonyms; } - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (currentRealToken == null) { - Token nextRealToken = realStream.next(); + Token nextRealToken = realStream.next(reusableToken); if (nextRealToken == null) { return null; } - String expansions = (String) synonyms.get(nextRealToken.termText()); + String expansions = (String) synonyms.get(nextRealToken.term()); if (expansions == null) { return nextRealToken; } st = new StringTokenizer(expansions, ","); if (st.hasMoreTokens()) { - currentRealToken = nextRealToken; + currentRealToken = (Token) nextRealToken.clone(); } return currentRealToken; } else { - String nextExpandedValue = st.nextToken(); - Token expandedToken = new Token(nextExpandedValue, currentRealToken.startOffset(), - currentRealToken.endOffset()); - expandedToken.setPositionIncrement(0); + reusableToken.reinit(st.nextToken(), + currentRealToken.startOffset(), + currentRealToken.endOffset()); + reusableToken.setPositionIncrement(0); if (!st.hasMoreTokens()) { currentRealToken = null; st = null; } - return expandedToken; + return reusableToken; } } Modified: lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (original) +++ lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Wed Aug 20 07:38:07 2008 @@ -520,12 +520,10 @@ } else { tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue())); } - Token next = tokenStream.next(); - while (next != null) { - next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned? - tokens.add(next); // the vector will be built on commit. - next = tokenStream.next(); + final Token reusableToken = new Token(); + for (Token nextToken = tokenStream.next(reusableToken); nextToken != null; nextToken = tokenStream.next(reusableToken)) { + tokens.add((Token) nextToken.clone()); // the vector will be built on commit. fieldSetting.fieldLength++; if (fieldSetting.fieldLength > maxFieldLength) { break; @@ -533,7 +531,10 @@ } } else { // untokenized - tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized")); + String fieldVal = field.stringValue(); + Token token = new Token(0, fieldVal.length(), "untokenized"); + token.setTermBuffer(fieldVal); + tokens.add(token); fieldSetting.fieldLength++; } } @@ -567,10 +568,10 @@ for (Token token : eField_Tokens.getValue()) { - TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.termText()); + TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.term()); if (termDocumentInformationFactory == null) { termDocumentInformationFactory = new TermDocumentInformationFactory(); - termDocumentInformationFactoryByTermText.put(token.termText(), termDocumentInformationFactory); + termDocumentInformationFactoryByTermText.put(token.term(), termDocumentInformationFactory); } //termDocumentInformationFactory.termFrequency++; Modified: lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original) +++ lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Wed Aug 20 07:38:07 2008 @@ -15,19 +15,32 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + import junit.framework.TestCase; + import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Payload; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.index.TermPositionVector; +import org.apache.lucene.index.TermPositions; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import java.io.IOException; -import java.util.*; - /** * Asserts equality of content and behaviour of two index readers. */ @@ -151,21 +164,24 @@ document.add(f); if (i > 4) { final List tokens = new ArrayList(2); - Token t = new Token("the", 0, 2, "text"); + Token t = createToken("the", 0, 2, "text"); t.setPayload(new Payload(new byte[]{1, 2, 3})); tokens.add(t); - t = new Token("end", 3, 5, "text"); + t = createToken("end", 3, 5, "text"); t.setPayload(new Payload(new byte[]{2})); tokens.add(t); - tokens.add(new Token("fin", 7, 9)); + tokens.add(createToken("fin", 7, 9)); document.add(new Field("f", new TokenStream() { Iterator it = tokens.iterator(); - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (!it.hasNext()) { return null; } - return it.next(); + // Resettable token streams need to return clones. + Token nextToken = (Token) it.next(); + return (Token) nextToken.clone(); } public void reset() throws IOException { @@ -466,4 +482,19 @@ testReader.close(); } + private static Token createToken(String term, int start, int offset) + { + Token token = new Token(start, offset); + token.setTermBuffer(term); + return token; + } + + private static Token createToken(String term, int start, int offset, String type) + { + Token token = new Token(start, offset, type); + token.setTermBuffer(term); + return token; + } + + } Modified: lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java (original) +++ lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java Wed Aug 20 07:38:07 2008 @@ -279,6 +279,7 @@ Analyzer analyzer = new StandardAnalyzer(); Enumeration fields = doc.fields(); + final Token reusableToken = new Token(); while (fields.hasMoreElements()) { Field field = (Field) fields.nextElement(); String fieldName = field.name(); @@ -299,10 +300,10 @@ // Tokenize field and add to postingTable TokenStream stream = analyzer.tokenStream(fieldName, reader); try { - for (Token t = stream.next(); t != null; t = stream.next()) { - position += (t.getPositionIncrement() - 1); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + position += (nextToken.getPositionIncrement() - 1); position++; - String name = t.termText(); + String name = nextToken.term(); Integer Count = (Integer) tokenHash.get(name); if (Count == null) { // not in there yet tokenHash.put(name, new Integer(1)); //first one Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java (original) +++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java Wed Aug 20 07:38:07 2008 @@ -73,10 +73,11 @@ return new TokenFilter(child.tokenStream(fieldName, reader)) { private int position = -1; - public Token next() throws IOException { - Token token = input.next(); // from filter super class - log.println(toString(token)); - return token; + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); // from filter super class + log.println(toString(nextToken)); + return nextToken; } private String toString(Token token) { @@ -84,7 +85,7 @@ position += token.getPositionIncrement(); return "[" + logName + ":" + position + ":" + fieldName + ":" - + token.termText() + ":" + token.startOffset() + + token.term() + ":" + token.startOffset() + "-" + token.endOffset() + ":" + token.type() + "]"; } @@ -121,8 +122,9 @@ return new TokenFilter(child.tokenStream(fieldName, reader)) { private int todo = maxTokens; - public Token next() throws IOException { - return --todo >= 0 ? input.next() : null; + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + return --todo >= 0 ? input.next(reusableToken) : null; } }; } @@ -239,10 +241,11 @@ final ArrayList tokens2 = new ArrayList(); TokenStream tokenStream = new TokenFilter(child.tokenStream(fieldName, reader)) { - public Token next() throws IOException { - Token token = input.next(); // from filter super class - if (token != null) tokens2.add(token); - return token; + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); // from filter super class + if (nextToken != null) tokens2.add(nextToken.clone()); + return nextToken; } }; @@ -253,7 +256,8 @@ private Iterator iter = tokens.iterator(); - public Token next() { + public Token next(Token token) { + assert token != null; if (!iter.hasNext()) return null; return (Token) iter.next(); } @@ -300,12 +304,12 @@ HashMap map = new HashMap(); TokenStream stream = analyzer.tokenStream("", new StringReader(text)); try { - Token token; - while ((token = stream.next()) != null) { - MutableInteger freq = (MutableInteger) map.get(token.termText()); + final Token reusableToken = new Token(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + MutableInteger freq = (MutableInteger) map.get(nextToken.term()); if (freq == null) { freq = new MutableInteger(1); - map.put(token.termText(), freq); + map.put(nextToken.term(), freq); } else { freq.setValue(freq.intValue() + 1); } Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original) +++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Wed Aug 20 07:38:07 2008 @@ -275,7 +275,8 @@ return new TokenStream() { private Iterator iter = keywords.iterator(); private int start = 0; - public Token next() { + public Token next(final Token reusableToken) { + assert reusableToken != null; if (!iter.hasNext()) return null; Object obj = iter.next(); @@ -283,9 +284,9 @@ throw new IllegalArgumentException("keyword must not be null"); String term = obj.toString(); - Token token = new Token(term, start, start + term.length()); + reusableToken.reinit(term, start, start+reusableToken.termLength()); start += term.length() + 1; // separate words by 1 (blank) character - return token; + return reusableToken; } }; } @@ -349,14 +350,13 @@ HashMap terms = new HashMap(); int numTokens = 0; int pos = -1; - Token token; - - while ((token = stream.next()) != null) { - String term = token.termText(); + final Token reusableToken = new Token(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + String term = nextToken.term(); if (term.length() == 0) continue; // nothing to do // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; - pos += token.getPositionIncrement(); + pos += nextToken.getPositionIncrement(); ArrayIntList positions = (ArrayIntList) terms.get(term); if (positions == null) { // term not seen before @@ -366,7 +366,7 @@ if (stride == 1) { positions.add(pos); } else { - positions.add(pos, token.startOffset(), token.endOffset()); + positions.add(pos, nextToken.startOffset(), nextToken.endOffset()); } } Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java (original) +++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java Wed Aug 20 07:38:07 2008 @@ -334,7 +334,8 @@ this.toLowerCase = toLowerCase; } - public Token next() { + public Token next(final Token reusableToken) { + assert reusableToken != null; if (matcher == null) return null; while (true) { // loop takes care of leading and trailing boundary cases @@ -352,7 +353,7 @@ if (start != end) { // non-empty match (header/trailer) String text = str.substring(start, end); if (toLowerCase) text = text.toLowerCase(locale); - return new Token(text, start, end); + return reusableToken.reinit(text, start, end); } if (!isMatch) return null; } @@ -384,7 +385,8 @@ this.stopWords = stopWords; } - public Token next() { + public Token next(final Token reusableToken) { + assert reusableToken != null; // cache loop instance vars (performance) String s = str; int len = s.length(); @@ -422,7 +424,11 @@ } while (text != null && isStopWord(text)); pos = i; - return text != null ? new Token(text, start, i) : null; + if (text == null) + { + return null; + } + return reusableToken.reinit(text, start, i); } private boolean isTokenChar(char c, boolean isLetter) { Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java (original) +++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java Wed Aug 20 07:38:07 2008 @@ -68,48 +68,51 @@ } /** Returns the next token in the stream, or null at EOS. */ - public Token next() throws IOException { - Token token; + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; while (todo > 0 && index < stack.length) { // pop from stack - token = createToken(stack[index++], current); - if (token != null) { + Token nextToken = createToken(stack[index++], current, reusableToken); + if (nextToken != null) { todo--; - return token; + return nextToken; } } - token = input.next(); - if (token == null) return null; // EOS; iterator exhausted + Token nextToken = input.next(reusableToken); + if (nextToken == null) return null; // EOS; iterator exhausted - stack = synonyms.getSynonyms(token.termText()); // push onto stack + stack = synonyms.getSynonyms(nextToken.term()); // push onto stack if (stack.length > maxSynonyms) randomize(stack); index = 0; - current = token; + current = (Token) nextToken.clone(); todo = maxSynonyms; - return token; + return nextToken; } /** * Creates and returns a token for the given synonym of the current input - * token; Override for custom (stateless or stateful) behaviour, if desired. + * token; Override for custom (stateless or stateful) behavior, if desired. * * @param synonym * a synonym for the current token's term * @param current * the current token from the underlying child stream + * @param reusableToken + * the token to reuse * @return a new token, or null to indicate that the given synonym should be * ignored */ - protected Token createToken(String synonym, Token current) { - Token token = new Token( - synonym, current.startOffset(), current.endOffset(), SYNONYM_TOKEN_TYPE); - token.setPositionIncrement(0); - return token; + protected Token createToken(String synonym, Token current, final Token reusableToken) { + reusableToken.reinit(current, synonym); + reusableToken.setTermBuffer(synonym); + reusableToken.setType(SYNONYM_TOKEN_TYPE); + reusableToken.setPositionIncrement(0); + return reusableToken; } /** * Randomize synonyms to later sample a subset. Uses constant random seed - * for reproducability. Uses "DRand", a simple, fast, uniform pseudo-random + * for reproducibility. Uses "DRand", a simple, fast, uniform pseudo-random * number generator with medium statistical quality (multiplicative * congruential method), producing integers in the range [Integer.MIN_VALUE, * Integer.MAX_VALUE]. Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java (original) +++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java Wed Aug 20 07:38:07 2008 @@ -197,9 +197,9 @@ private List getTokens(TokenStream stream) throws IOException { ArrayList tokens = new ArrayList(); - Token token; - while ((token = stream.next()) != null) { - tokens.add(token); + final Token reusableToken = new Token(); + for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) { + tokens.add(nextToken.clone()); } return tokens; } @@ -211,7 +211,7 @@ for (; i < size; i++) { Token t1 = (Token) tokens1.get(i); Token t2 = (Token) tokens2.get(i); - if (!(t1.termText().equals(t2.termText()))) throw new IllegalStateException("termText"); + if (!(t1.term().equals(t2.term()))) throw new IllegalStateException("termText"); if (t1.startOffset() != t2.startOffset()) throw new IllegalStateException("startOffset"); if (t1.endOffset() != t2.endOffset()) throw new IllegalStateException("endOffset"); if (!(t1.type().equals(t2.type()))) throw new IllegalStateException("type"); @@ -222,8 +222,8 @@ catch (IllegalStateException e) { if (size > 0) { System.out.println("i=" + i + ", size=" + size); - System.out.println("t1[size]='" + ((Token) tokens1.get(size-1)).termText() + "'"); - System.out.println("t2[size]='" + ((Token) tokens2.get(size-1)).termText() + "'"); + System.out.println("t1[size]='" + ((Token) tokens1.get(size-1)).term() + "'"); + System.out.println("t2[size]='" + ((Token) tokens2.get(size-1)).term() + "'"); } throw e; } @@ -234,7 +234,7 @@ String str = "["; for (int i=0; i < tokens.size(); i++) { Token t1 = (Token) tokens.get(i); - str = str + "'" + t1.termText() + "', "; + str = str + "'" + t1.term() + "', "; } return str + "]"; } Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java (original) +++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java Wed Aug 20 07:38:07 2008 @@ -23,6 +23,7 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.Query; @@ -105,21 +106,23 @@ // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - org.apache.lucene.analysis.Token t; + final Token reusableToken = new Token(); + Token nextToken; int countTokens = 0; while (true) { try { - t = source.next(); + nextToken = source.next(reusableToken); } catch (IOException e) { - t = null; + nextToken = null; } - if (t == null) { + if (nextToken == null) { break; } - if (!"".equals(t.termText())) { + String term = nextToken.term(); + if (!"".equals(term)) { try { - tlist.set(countTokens++, t.termText()); + tlist.set(countTokens++, term); } catch (IndexOutOfBoundsException ioobe) { countTokens = -1; } @@ -189,18 +192,19 @@ // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); List tlist = new ArrayList(); - org.apache.lucene.analysis.Token t; + final Token reusableToken = new Token(); + Token nextToken; while (true) { try { - t = source.next(); + nextToken = source.next(reusableToken); } catch (IOException e) { - t = null; + nextToken = null; } - if (t == null) { + if (nextToken == null) { break; } - tlist.add(t.termText()); + tlist.add(nextToken.term()); } try { @@ -238,14 +242,15 @@ throws ParseException { // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - org.apache.lucene.analysis.Token t; + final Token reusableToken = new Token(); + Token nextToken; boolean multipleTokens = false; try { - t = source.next(); - multipleTokens = source.next() != null; + nextToken = source.next(reusableToken); + multipleTokens = source.next(reusableToken) != null; } catch (IOException e) { - t = null; + nextToken = null; } try { @@ -259,7 +264,7 @@ + " - tokens were added"); } - return (t == null) ? null : super.getFuzzyQuery(field, t.termText(), minSimilarity); + return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken.term(), minSimilarity); } /** @@ -270,18 +275,20 @@ throws ParseException { // get Analyzer from superclass and tokenize the terms TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1)); - org.apache.lucene.analysis.Token t; + final Token reusableToken = new Token(); + Token nextToken; + Token multipleToken; boolean multipleTokens = false; // part1 try { - t = source.next(); - if (t != null) { - part1 = t.termText(); + nextToken = source.next(reusableToken); + if (nextToken != null) { + part1 = nextToken.term(); } - multipleTokens = source.next() != null; + multipleTokens = source.next(reusableToken) != null; } catch (IOException e) { - t = null; + nextToken = null; } try { source.close(); @@ -293,16 +300,16 @@ + " - tokens were added to part1"); } - source = getAnalyzer().tokenStream(field, new StringReader(part2)); // part2 + source = getAnalyzer().tokenStream(field, new StringReader(part2)); try { - t = source.next(); - if (t != null) { - part2 = t.termText(); + nextToken = source.next(reusableToken); + if (nextToken != null) { + part2 = nextToken.term(); } - multipleTokens = source.next() != null; + multipleTokens = source.next(reusableToken) != null; } catch (IOException e) { - t = null; + nextToken = null; } try { source.close(); Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/CharStream.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/CharStream.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/CharStream.java (original) +++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/CharStream.java Wed Aug 20 07:38:07 2008 @@ -26,6 +26,20 @@ char readChar() throws java.io.IOException; /** + * Returns the column position of the character last read. + * @deprecated + * @see #getEndColumn + */ + int getColumn(); + + /** + * Returns the line number of the character last read. + * @deprecated + * @see #getEndLine + */ + int getLine(); + + /** * Returns the column number of the last character for current token (being * matched after the last call to BeginTOken). */ Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (original) +++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java Wed Aug 20 07:38:07 2008 @@ -1,14 +1,29 @@ /* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParser.java */ package org.apache.lucene.queryParser.precedence; +import java.io.IOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Locale; import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; /** @@ -296,21 +311,22 @@ TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); - org.apache.lucene.analysis.Token t; + final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); + org.apache.lucene.analysis.Token nextToken; int positionCount = 0; boolean severalTokensAtSamePosition = false; while (true) { try { - t = source.next(); + nextToken = source.next(reusableToken); } catch (IOException e) { - t = null; + nextToken = null; } - if (t == null) + if (nextToken == null) break; - v.addElement(t); - if (t.getPositionIncrement() == 1) + v.addElement(nextToken.clone()); + if (nextToken.getPositionIncrement() == 1) positionCount++; else severalTokensAtSamePosition = true; @@ -325,17 +341,17 @@ if (v.size() == 0) return null; else if (v.size() == 1) { - t = (org.apache.lucene.analysis.Token) v.elementAt(0); - return new TermQuery(new Term(field, t.termText())); + nextToken = (org.apache.lucene.analysis.Token) v.elementAt(0); + return new TermQuery(new Term(field, nextToken.term())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(); for (int i = 0; i < v.size(); i++) { - t = (org.apache.lucene.analysis.Token) v.elementAt(i); + nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i); TermQuery currentQuery = new TermQuery( - new Term(field, t.termText())); + new Term(field, nextToken.term())); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; @@ -345,12 +361,12 @@ MultiPhraseQuery mpq = new MultiPhraseQuery(); List multiTerms = new ArrayList(); for (int i = 0; i < v.size(); i++) { - t = (org.apache.lucene.analysis.Token) v.elementAt(i); - if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) { + nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i); + if (nextToken.getPositionIncrement() == 1 && multiTerms.size() > 0) { mpq.add((Term[])multiTerms.toArray(new Term[0])); multiTerms.clear(); } - multiTerms.add(new Term(field, t.termText())); + multiTerms.add(new Term(field, nextToken.term())); } mpq.add((Term[])multiTerms.toArray(new Term[0])); return mpq; @@ -361,7 +377,7 @@ q.setSlop(phraseSlop); for (int i = 0; i < v.size(); i++) { q.add(new Term(field, ((org.apache.lucene.analysis.Token) - v.elementAt(i)).termText())); + v.elementAt(i)).term())); } return q; Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (original) +++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj Wed Aug 20 07:38:07 2008 @@ -25,14 +25,29 @@ package org.apache.lucene.queryParser.precedence; +import java.io.IOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Locale; import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; /** @@ -320,21 +335,22 @@ TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); - org.apache.lucene.analysis.Token t; + final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); + org.apache.lucene.analysis.Token nextToken; int positionCount = 0; boolean severalTokensAtSamePosition = false; while (true) { try { - t = source.next(); + nextToken = source.next(reusableToken); } catch (IOException e) { - t = null; + nextToken = null; } - if (t == null) + if (nextToken == null) break; - v.addElement(t); - if (t.getPositionIncrement() == 1) + v.addElement(nextToken.clone()); + if (nextToken.getPositionIncrement() == 1) positionCount++; else severalTokensAtSamePosition = true; @@ -349,17 +365,17 @@ if (v.size() == 0) return null; else if (v.size() == 1) { - t = (org.apache.lucene.analysis.Token) v.elementAt(0); - return new TermQuery(new Term(field, t.termText())); + nextToken = (org.apache.lucene.analysis.Token) v.elementAt(0); + return new TermQuery(new Term(field, nextToken.term())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(); for (int i = 0; i < v.size(); i++) { - t = (org.apache.lucene.analysis.Token) v.elementAt(i); + nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i); TermQuery currentQuery = new TermQuery( - new Term(field, t.termText())); + new Term(field, nextToken.term())); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; @@ -369,12 +385,12 @@ MultiPhraseQuery mpq = new MultiPhraseQuery(); List multiTerms = new ArrayList(); for (int i = 0; i < v.size(); i++) { - t = (org.apache.lucene.analysis.Token) v.elementAt(i); - if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) { + nextToken = (org.apache.lucene.analysis.Token) v.elementAt(i); + if (nextToken.getPositionIncrement() == 1 && multiTerms.size() > 0) { mpq.add((Term[])multiTerms.toArray(new Term[0])); multiTerms.clear(); } - multiTerms.add(new Term(field, t.termText())); + multiTerms.add(new Term(field, nextToken.term())); } mpq.add((Term[])multiTerms.toArray(new Term[0])); return mpq; @@ -385,7 +401,7 @@ q.setSlop(phraseSlop); for (int i = 0; i < v.size(); i++) { q.add(new Term(field, ((org.apache.lucene.analysis.Token) - v.elementAt(i)).termText())); + v.elementAt(i)).term())); } return q; Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (original) +++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java Wed Aug 20 07:38:07 2008 @@ -1,13 +1,27 @@ /* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserTokenManager.java */ package org.apache.lucene.queryParser.precedence; +import java.io.IOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Locale; import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; public class PrecedenceQueryParserTokenManager implements PrecedenceQueryParserConstants Modified: lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original) +++ lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Wed Aug 20 07:38:07 2008 @@ -57,19 +57,26 @@ boolean inPhrase = false; int savedStart = 0, savedEnd = 0; - public Token next() throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (inPhrase) { inPhrase = false; - return new Token("phrase2", savedStart, savedEnd); + reusableToken.setTermBuffer("phrase2"); + reusableToken.setStartOffset(savedStart); + reusableToken.setEndOffset(savedEnd); + return reusableToken; } else - for (Token token = input.next(); token != null; token = input.next()) { - if (token.termText().equals("phrase")) { + for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) { + if (nextToken.term().equals("phrase")) { inPhrase = true; - savedStart = token.startOffset(); - savedEnd = token.endOffset(); - return new Token("phrase1", savedStart, savedEnd); - } else if (!token.termText().equals("stop")) - return token; + savedStart = nextToken.startOffset(); + savedEnd = nextToken.endOffset(); + nextToken.setTermBuffer("phrase1"); + nextToken.setStartOffset(savedStart); + nextToken.setEndOffset(savedEnd); + return nextToken; + } else if (!nextToken.term().equals("stop")) + return nextToken; } return null; } Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original) +++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Wed Aug 20 07:38:07 2008 @@ -104,18 +104,19 @@ { if(f.queryString==null) return; TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString)); - Token token=ts.next(); + final Token reusableToken = new Token(); int corpusNumDocs=reader.numDocs(); Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects HashSet processedTerms=new HashSet(); - while(token!=null) - { - if(!processedTerms.contains(token.termText())) + for (Token nextToken = ts.next(reusableToken); nextToken!=null; nextToken = ts.next(reusableToken)) + { + String term = nextToken.term(); + if(!processedTerms.contains(term)) { - processedTerms.add(token.termText()); + processedTerms.add(term); ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term float minScore=0; - Term startTerm=internSavingTemplateTerm.createTerm(token.termText()); + Term startTerm=internSavingTemplateTerm.createTerm(term); FuzzyTermEnum fe=new FuzzyTermEnum(reader,startTerm,f.minSimilarity,f.prefixLength); TermEnum origEnum = reader.terms(startTerm); int df=0; @@ -162,8 +163,7 @@ q.insert(st); } } - token=ts.next(); - } + } } public Query rewrite(IndexReader reader) throws IOException Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (original) +++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java Wed Aug 20 07:38:07 2008 @@ -28,6 +28,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Hits; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -808,10 +809,11 @@ throws IOException { TokenStream ts = analyzer.tokenStream(fieldName, r); - org.apache.lucene.analysis.Token token; int tokenCount=0; - while ((token = ts.next()) != null) { // for every token - String word = token.termText(); + // for every token + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + String word = nextToken.term(); tokenCount++; if(tokenCount>maxNumTokensParsed) { @@ -872,7 +874,7 @@ * For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}. * * @param r the reader that has the content of the document - * @return the most intresting words in the document ordered by score, with the highest scoring, or best entry, first + * @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first * * @see #retrieveInterestingTerms */ Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java (original) +++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java Wed Aug 20 07:38:07 2008 @@ -21,6 +21,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; @@ -85,12 +86,11 @@ throws IOException { TokenStream ts = a.tokenStream( field, new StringReader( body)); - org.apache.lucene.analysis.Token t; BooleanQuery tmp = new BooleanQuery(); Set already = new HashSet(); // ignore dups - while ( (t = ts.next()) != null) - { - String word = t.termText(); + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + String word = nextToken.term(); // ignore opt stop words if ( stop != null && stop.contains( word)) continue; Modified: lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original) +++ lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Wed Aug 20 07:38:07 2008 @@ -18,11 +18,10 @@ */ import java.io.IOException; - import java.lang.reflect.Method; import net.sf.snowball.SnowballProgram; -import net.sf.snowball.ext.*; +import net.sf.snowball.ext.EnglishStemmer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; @@ -60,20 +59,22 @@ } /** Returns the next input Token, after being stemmed */ - public final Token next() throws IOException { - Token token = input.next(); - if (token == null) + public final Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; + Token nextToken = input.next(reusableToken); + if (nextToken == null) return null; - stemmer.setCurrent(token.termText()); + String originalTerm = nextToken.term(); + stemmer.setCurrent(originalTerm); try { stemMethod.invoke(stemmer, EMPTY_ARGS); } catch (Exception e) { throw new RuntimeException(e.toString()); } - - Token newToken = new Token(stemmer.getCurrent(), - token.startOffset(), token.endOffset(), token.type()); - newToken.setPositionIncrement(token.getPositionIncrement()); - return newToken; + String finalTerm = stemmer.getCurrent(); + // Don't bother updating, if it is unchanged. + if (!originalTerm.equals(finalTerm)) + nextToken.setTermBuffer(finalTerm); + return nextToken; } } Modified: lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original) +++ lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Wed Aug 20 07:38:07 2008 @@ -1,64 +1,30 @@ package org.apache.lucene.analysis.snowball; -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2004 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -import java.io.*; +import java.io.StringReader; -import junit.framework.*; +import junit.framework.TestCase; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.index.Payload; +import org.apache.lucene.analysis.TokenStream; public class TestSnowball extends TestCase { @@ -66,12 +32,12 @@ String input, String[] output) throws Exception { TokenStream ts = a.tokenStream("dummy", new StringReader(input)); + final Token reusableToken = new Token(); for (int i = 0; i < output.length; i++) { - Token t = ts.next(); - assertNotNull(t); - assertEquals(output[i], t.termText()); + Token nextToken = ts.next(reusableToken); + assertEquals(output[i], nextToken.term()); } - assertNull(ts.next()); + assertNull(ts.next(reusableToken)); ts.close(); } @@ -83,25 +49,33 @@ public void testFilterTokens() throws Exception { - final Token tok = new Token("accents", 2, 7, "wrd"); + final Token tok = new Token(2, 7, "wrd"); + tok.setTermBuffer("accents"); tok.setPositionIncrement(3); + Payload tokPayload = new Payload(new byte[]{0,1,2,3}); + tok.setPayload(tokPayload); + int tokFlags = 77; + tok.setFlags(tokFlags); SnowballFilter filter = new SnowballFilter( new TokenStream() { - public Token next() { + public Token next(final Token reusableToken) { + assert reusableToken != null; return tok; } }, "English" ); - Token newtok = filter.next(); + final Token reusableToken = new Token(); + Token nextToken = filter.next(reusableToken); - assertEquals("accent", newtok.termText()); - assertEquals(2, newtok.startOffset()); - assertEquals(7, newtok.endOffset()); - assertEquals("wrd", newtok.type()); - assertEquals(3, newtok.getPositionIncrement()); + assertEquals("accent", nextToken.term()); + assertEquals(2, nextToken.startOffset()); + assertEquals(7, nextToken.endOffset()); + assertEquals("wrd", nextToken.type()); + assertEquals(3, nextToken.getPositionIncrement()); + assertEquals(tokFlags, nextToken.getFlags()); + assertEquals(tokPayload, nextToken.getPayload()); } -} - +} \ No newline at end of file Modified: lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java?rev=687357&r1=687356&r2=687357&view=diff ============================================================================== --- lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (original) +++ lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java Wed Aug 20 07:38:07 2008 @@ -133,7 +133,8 @@ * * @see org.apache.lucene.analysis.TokenStream#next() */ - public Token next(Token result) throws IOException { + public Token next(final Token reusableToken) throws IOException { + assert reusableToken != null; if (tokens != null && tokens.hasNext()){ return (Token)tokens.next(); } @@ -144,22 +145,22 @@ } String type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType]; if (tokenOutput == TOKENS_ONLY || untokenizedTypes.contains(type) == false){ - setupToken(result); + setupToken(reusableToken); } else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.contains(type) == true){ - collapseTokens(result, tokenType); + collapseTokens(reusableToken, tokenType); } else if (tokenOutput == BOTH){ //collapse into a single token, add it to tokens AND output the individual tokens //output the untokenized Token first - collapseAndSaveTokens(result, tokenType, type); + collapseAndSaveTokens(reusableToken, tokenType, type); } - result.setPositionIncrement(scanner.getPositionIncrement()); - result.setType(type); - return result; + reusableToken.setPositionIncrement(scanner.getPositionIncrement()); + reusableToken.setType(type); + return reusableToken; } - private void collapseAndSaveTokens(Token result, int tokenType, String type) throws IOException { + private void collapseAndSaveTokens(final Token reusableToken, int tokenType, String type) throws IOException { //collapse StringBuffer buffer = new StringBuffer(32); int numAdded = scanner.setText(buffer); @@ -188,10 +189,10 @@ } //trim the buffer String s = buffer.toString().trim(); - result.setTermBuffer(s.toCharArray(), 0, s.length()); - result.setStartOffset(theStart); - result.setEndOffset(theStart + s.length()); - result.setFlags(UNTOKENIZED_TOKEN_FLAG); + reusableToken.setTermBuffer(s.toCharArray(), 0, s.length()); + reusableToken.setStartOffset(theStart); + reusableToken.setEndOffset(theStart + s.length()); + reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG); //The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos if (tmpTokType != WikipediaTokenizerImpl.YYEOF){ scanner.yypushback(scanner.yylength()); @@ -205,7 +206,7 @@ saved.setType(type); } - private void collapseTokens(Token result, int tokenType) throws IOException { + private void collapseTokens(final Token reusableToken, int tokenType) throws IOException { //collapse StringBuffer buffer = new StringBuffer(32); int numAdded = scanner.setText(buffer); @@ -227,10 +228,10 @@ } //trim the buffer String s = buffer.toString().trim(); - result.setTermBuffer(s.toCharArray(), 0, s.length()); - result.setStartOffset(theStart); - result.setEndOffset(theStart + s.length()); - result.setFlags(UNTOKENIZED_TOKEN_FLAG); + reusableToken.setTermBuffer(s.toCharArray(), 0, s.length()); + reusableToken.setStartOffset(theStart); + reusableToken.setEndOffset(theStart + s.length()); + reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG); //The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos if (tmpTokType != WikipediaTokenizerImpl.YYEOF){ scanner.yypushback(scanner.yylength()); @@ -239,11 +240,11 @@ } } - private void setupToken(Token result) { - scanner.getText(result); + private void setupToken(final Token reusableToken) { + scanner.getText(reusableToken); final int start = scanner.yychar(); - result.setStartOffset(start); - result.setEndOffset(start + result.termLength()); + reusableToken.setStartOffset(start); + reusableToken.setEndOffset(start + reusableToken.termLength()); } /*