lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From whosc...@apache.org
Subject svn commit: r479699 - /lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
Date Mon, 27 Nov 2006 17:37:27 GMT
Author: whoschek
Date: Mon Nov 27 09:37:26 2006
New Revision: 479699

URL: http://svn.apache.org/viewvc?view=rev&rev=479699
Log:
added clear() method to TokenCachingAnalyzer, changed anonymous class to public class

Modified:
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java?view=diff&rev=479699&r1=479698&r2=479699
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
(original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
Mon Nov 27 09:37:26 2006
@@ -203,57 +203,74 @@
 
   
   /**
-   * Returns an analyzer wrapper that caches all tokens generated by the underlying child
analyzer's
-   * token stream, and delivers those cached tokens on subsequent matching calls to 
-   * <code>tokenStream(String fieldName, Reader reader)</code>.
+   * Analyzer wrapper that caches all tokens generated by the underlying child analyzer's
+   * token streams, and delivers those cached tokens on subsequent calls to 
+   * <code>tokenStream(String fieldName, Reader reader)</code>, 
+   * if the fieldName has been seen before, altogether ignoring the Reader parameter.
    * <p>
    * If Analyzer / TokenFilter chains are expensive in terms of I/O or CPU, such caching
can 
    * help improve performance if the same document is added to multiple Lucene indexes, 
    * because the text analysis phase need not be performed more than once.
    * <p>
    * Caveats: 
-   * 1) Caching the tokens of large Lucene documents can lead to out of memory exceptions.

-   * 2) The Token instances delivered by the underlying child analyzer must be immutable.
-   * 
-   * @param child
-   *            the underlying child analyzer
-   * @return a new analyzer
+   * <ul>
+   * <li>Caching the tokens of large Lucene documents can lead to out of memory exceptions.</li>

+   * <li>The Token instances delivered by the underlying child analyzer must be immutable.</li>
+   * <li>A caching analyzer instance must not be used for more than one document, unless

+   * <code>clear()</code> is called before each new document.</li>
+   * </ul>
    */
-  public static Analyzer getTokenCachingAnalyzer(final Analyzer child) {
-
-    if (child == null)
-      throw new IllegalArgumentException("child analyzer must not be null");
-
-    return new Analyzer() {
-
-      private final HashMap cache = new HashMap();
-
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        final ArrayList tokens = (ArrayList) cache.get(fieldName);
-        if (tokens == null) { // not yet cached
-          final ArrayList tokens2 = new ArrayList();
-          cache.put(fieldName, tokens2);
-          return new TokenFilter(child.tokenStream(fieldName, reader)) {
-
-            public Token next() throws IOException {
-              Token token = input.next(); // from filter super class
-              if (token != null) tokens2.add(token);
-              return token;
-            }
-          };
-        } else { // already cached
-          return new TokenStream() {
-
-            private Iterator iter = tokens.iterator();
-
-            public Token next() {
-              if (!iter.hasNext()) return null;
-              return (Token) iter.next();
-            }
-          };
-        }
+  public static class TokenCachingAnalyzer extends Analyzer {
+    
+    private final Analyzer child;
+    private final HashMap cache = new HashMap();
+      
+    /**
+     * Creates and returns a new caching analyzer that wraps the given underlying child analyzer.
+     * 
+     * @param child
+     *            the underlying child analyzer
+     * @return a new caching analyzer
+     */
+    public TokenCachingAnalyzer(Analyzer child) {
+      if (child == null)
+        throw new IllegalArgumentException("child analyzer must not be null");
+
+      this.child = child;
+    }
+    
+    /**
+     * Removes all cached data.
+     */
+    public void clear() {
+      cache.clear();
+    }
+
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      final ArrayList tokens = (ArrayList) cache.get(fieldName);
+      if (tokens == null) { // not yet cached
+        final ArrayList tokens2 = new ArrayList();
+        cache.put(fieldName, tokens2);
+        return new TokenFilter(child.tokenStream(fieldName, reader)) {
+
+          public Token next() throws IOException {
+            Token token = input.next(); // from filter super class
+            if (token != null) tokens2.add(token);
+            return token;
+          }
+        };
+      } else { // already cached
+        return new TokenStream() {
+
+          private Iterator iter = tokens.iterator();
+
+          public Token next() {
+            if (!iter.hasNext()) return null;
+            return (Token) iter.next();
+          }
+        };
       }
-    };
+    }
   }
       
   



Mime
View raw message