lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From whosc...@apache.org
Subject svn commit: r478360 - /lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
Date Wed, 22 Nov 2006 22:55:02 GMT
Author: whoschek
Date: Wed Nov 22 14:55:01 2006
New Revision: 478360

URL: http://svn.apache.org/viewvc?view=rev&rev=478360
Log:
added getTokenCachingAnalyzer()

Modified:
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java?view=diff&rev=478360&r1=478359&r2=478360
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
(original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
Wed Nov 22 14:55:01 2006
@@ -21,9 +21,11 @@
 import java.io.PrintStream;
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -201,6 +203,60 @@
 
   
   /**
+   * Returns an analyzer wrapper that caches all tokens generated by the underlying child
analyzer's
+   * token stream, and delivers those cached tokens on subsequent calls to 
+   * <code>tokenStream(String fieldName, Reader reader)</code>.
+   * <p>
+   * This can help improve performance in the presence of expensive Analyzer / TokenFilter
chains.
+   * <p>
+   * Caveats: Caching only works if the methods equals() and hashCode() methods are properly

+   * implemented on the Reader passed to <code>tokenStream(String fieldName, Reader
reader)</code>.
+   * Further, using caching on large Lucene documents can lead to out of memory exceptions.
+   * 
+   * @param child
+   *            the underlying child analyzer
+   * @return a new analyzer
+   */
+  public static Analyzer getTokenCachingAnalyzer(final Analyzer child) {
+
+    if (child == null)
+      throw new IllegalArgumentException("child analyzer must not be null");
+
+    return new Analyzer() {
+
+      private final HashMap cache = new HashMap();
+
+      public TokenStream tokenStream(String fieldName, Reader reader) {
+        Pair pair = new Pair(fieldName, reader);
+        final ArrayList tokens = (ArrayList) cache.get(pair);
+        if (tokens == null) { // not yet cached
+          final ArrayList tokens2 = new ArrayList();
+          cache.put(pair, tokens2);
+          return new TokenFilter(child.tokenStream(fieldName, reader)) {
+
+            public Token next() throws IOException {
+              Token token = input.next(); // from filter super class
+              if (token != null) tokens2.add(token);
+              return token;
+            }
+          };
+        } else { // already cached
+          return new TokenStream() {
+
+            private Iterator iter = tokens.iterator();
+
+            public Token next() {
+              if (!iter.hasNext()) return null;
+              return (Token) iter.next();
+            }
+          };
+        }
+      }
+    };
+  }
+      
+  
+  /**
    * Returns (frequency:term) pairs for the top N distinct terms (aka words),
    * sorted descending by frequency (and ascending by term, if tied).
    * <p>
@@ -381,4 +437,109 @@
     }   
   }
   
+  
+  ///////////////////////////////////////////////////////////////////////////////
+  // Nested classes:
+  ///////////////////////////////////////////////////////////////////////////////
+  /**
+   * A convenience class holding two elements, namely <code>first</code> and
<code>second</code>,
+   * either or both of which may be <code>null</code>.
+   */
+  private static final class Pair implements java.io.Serializable {
+    
+    protected Object first;
+    protected Object second;
+
+    private Pair() {}
+
+    /** Constructs a pair with the given two elements, either or both of which may be <code>null</code>.
+     * 
+     * @param first the first element of the pair.
+     * @param second the second element of the pair.
+     */
+    public Pair(Object first, Object second) {
+      this.first = first;
+      this.second = second;
+    }
+
+    /** Returns the first element of the pair.
+     * 
+     *  @return The first element of the pair.
+     */
+    public Object first() {
+      return this.first;
+    }
+
+    /** Returns the second element of the pair.
+     * 
+     *  @return The second element of the pair.
+     */
+    public Object second() {
+      return this.second;
+    }
+
+    public String toString() {
+      return "Pair (first=" + String.valueOf(first) + ", second=" + String.valueOf(second)
+ ")";
+    }
+
+    public int hashCode() {
+      return hashCode(this.first, this.second);
+    }
+
+    public boolean equals(Object other) {
+      if (!(other instanceof Pair)) return false;
+      return equals(this.first, ((Pair) other).first, this.second, ((Pair) other).second);
+    }
+
+    /** Compares two 'pairs' <code>x</code> and <code>y</code> for
equality.
+     * 
+     * In other words determines <code>xA.equals(yA)</code> and <code>xB.equals(yB)</code>,

+     * taking care of <code>null</code> values.
+     * This is a static method that avoids the inefficiency of temporary {@link Pair} objects.
+     * 
+     * @return <code>true</code> if the pair <code>x</code> and the
pair <code>y</code> are equal; <code>false</code> otherwise.
+     */
+    public static boolean equals(Object xA, Object yA, Object xB, Object yB) {
+      // compare A
+      if (xA != yA) {
+        if (xA == null && yA != null)
+          return false;
+        if (xA != null && yA == null)
+          return false;
+        if (!xA.equals(yA))
+          return false;
+      }
+
+      // compare B
+      if (xB != yB) {
+        if (xB == null && yB != null)
+          return false;
+        if (xB != null && yB == null)
+          return false;
+        if (!xB.equals(yB))
+          return false;
+      }
+
+      return true;
+    }
+
+    /** Returns the hashcode of the two elements of a 'pair'.
+     * 
+     * This is a static method that avoids the inefficiency of temporary {@link Pair} objects.
+     * 
+     * @return the hash code.
+     */
+    public static int hashCode(Object x, Object y) {
+      if (x == null && y == null)
+        return 0;
+      else if (x == null)
+        return y.hashCode();
+      else if (y == null)
+        return x.hashCode();
+      else
+        return x.hashCode() ^ y.hashCode();
+    }
+
+  }
+
 }



Mime
View raw message