Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 64991 invoked from network); 27 Nov 2006 20:26:12 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 27 Nov 2006 20:26:12 -0000 Received: (qmail 73174 invoked by uid 500); 27 Nov 2006 20:26:21 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 73166 invoked by uid 500); 27 Nov 2006 20:26:20 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 73155 invoked by uid 99); 27 Nov 2006 20:26:20 -0000 Received: from herse.apache.org (HELO herse.apache.org) (140.211.11.133) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Nov 2006 12:26:20 -0800 X-ASF-Spam-Status: No, hits=-9.4 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Nov 2006 12:26:10 -0800 Received: by eris.apache.org (Postfix, from userid 65534) id 718A41A9846; Mon, 27 Nov 2006 12:25:33 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r479749 - /lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java Date: Mon, 27 Nov 2006 20:25:33 -0000 To: java-commits@lucene.apache.org From: whoschek@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20061127202533.718A41A9846@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: whoschek Date: Mon Nov 27 12:25:32 2006 New Revision: 479749 URL: http://svn.apache.org/viewvc?view=rev&rev=479749 Log: Reverting back because of: "Actually, now I'm considering reverting back to the version without a public clear() method. The rationale is that this would be less complex and more consistent with the AnalyzerUtil design (simple methods generating simple anonymous analyzer wrappers). If desired, you can still (re)use a single static "child" analyzer instance. It's cheap and easy to create a new caching analyzer on top of the static analyzer, and to do so before each document. The old one will simply be gc'd." Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java?view=diff&rev=479749&r1=479748&r2=479749 ============================================================================== --- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java (original) +++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java Mon Nov 27 12:25:32 2006 @@ -203,10 +203,10 @@ /** - * Analyzer wrapper that caches all tokens generated by the underlying child analyzer's + * Returns an analyzer wrapper that caches all tokens generated by the underlying child analyzer's * token streams, and delivers those cached tokens on subsequent calls to - * tokenStream(String fieldName, Reader reader), - * if the fieldName has been seen before, altogether ignoring the Reader parameter. + * tokenStream(String fieldName, Reader reader) + * if the fieldName has been seen before, altogether ignoring the Reader parameter on cache lookup. *

* If Analyzer / TokenFilter chains are expensive in terms of I/O or CPU, such caching can * help improve performance if the same document is added to multiple Lucene indexes, @@ -216,61 +216,49 @@ *

    *
  • Caching the tokens of large Lucene documents can lead to out of memory exceptions.
  • *
  • The Token instances delivered by the underlying child analyzer must be immutable.
  • - *
  • A caching analyzer instance must not be used for more than one document, unless - * clear() is called before each new document.
  • + *
  • A caching analyzer instance must not be used for more than one document + * because the cache is not keyed on the Reader parameter.
  • *
+ * + * @param child + * the underlying child analyzer + * @return a new analyzer */ - public static class TokenCachingAnalyzer extends Analyzer { - - private final Analyzer child; - private final HashMap cache = new HashMap(); - - /** - * Creates and returns a new caching analyzer that wraps the given underlying child analyzer. - * - * @param child - * the underlying child analyzer - * @return a new caching analyzer - */ - public TokenCachingAnalyzer(Analyzer child) { - if (child == null) - throw new IllegalArgumentException("child analyzer must not be null"); - - this.child = child; - } - - /** - * Removes all cached data. - */ - public void clear() { - cache.clear(); - } - - public TokenStream tokenStream(String fieldName, Reader reader) { - final ArrayList tokens = (ArrayList) cache.get(fieldName); - if (tokens == null) { // not yet cached - final ArrayList tokens2 = new ArrayList(); - cache.put(fieldName, tokens2); - return new TokenFilter(child.tokenStream(fieldName, reader)) { - - public Token next() throws IOException { - Token token = input.next(); // from filter super class - if (token != null) tokens2.add(token); - return token; - } - }; - } else { // already cached - return new TokenStream() { - - private Iterator iter = tokens.iterator(); - - public Token next() { - if (!iter.hasNext()) return null; - return (Token) iter.next(); - } - }; + public static Analyzer getTokenCachingAnalyzer(final Analyzer child) { + + if (child == null) + throw new IllegalArgumentException("child analyzer must not be null"); + + return new Analyzer() { + + private final HashMap cache = new HashMap(); + + public TokenStream tokenStream(String fieldName, Reader reader) { + final ArrayList tokens = (ArrayList) cache.get(fieldName); + if (tokens == null) { // not yet cached + final ArrayList tokens2 = new ArrayList(); + cache.put(fieldName, tokens2); + return new TokenFilter(child.tokenStream(fieldName, reader)) { + + public Token next() throws IOException { + Token token = input.next(); // from filter super class + if (token != null) tokens2.add(token); + return token; + } + }; + } else { // already cached + return new TokenStream() { + + private Iterator iter = tokens.iterator(); + + public Token next() { + if (!iter.hasNext()) return null; + return (Token) iter.next(); + } + }; + } } - } + }; }