lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r807574 - in /lucene/java/trunk/src/java/org/apache/lucene/analysis: BaseCharFilter.java CharReader.java CharStream.java MappingCharFilter.java NormalizeCharMap.java
Date Tue, 25 Aug 2009 12:02:18 GMT
Author: mikemccand
Date: Tue Aug 25 12:02:17 2009
New Revision: 807574

URL: http://svn.apache.org/viewvc?rev=807574&view=rev
Log:
improvements to CharStream/Reader/Filter & related classes' javadocs

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java Tue Aug 25 12:02:17
2009
@@ -21,15 +21,15 @@
 import java.util.List;
 
 /**
- * Base utility class for implementing a {@link
- * CharFilter}.  You record mappings by calling {@link
- * #addOffCorrectMap}, and then invoke the correct method.
- * This class is not particularly efficient, eg a new class
- * instance is created for every call to {@link
- * #addOffCorrectMap}, which is appended to a private list.
- * When retrieving a mapping, that list is linearly
- * checked.
- * @version $Id$
+ * Base utility class for implementing a {@link CharFilter}.
+ * You subclass this, and then record mappings by calling
+ * {@link #addOffCorrectMap}, and then invoke the correct
+ * method to correct an offset.
+ *
+ * <p><b>NOTE</b>: This class is not particularly efficient.
+ * For example, a new class instance is created for every
+ * call to {@link #addOffCorrectMap}, which is then appended
+ * to a private list.
  */
 public abstract class BaseCharFilter extends CharFilter {
 
@@ -41,8 +41,10 @@
   }
 
   /** Retrieve the corrected offset.  Note that this method
-   *  is slow if you correct positions far before the most
-   *  recently added position. */
+   *  is slow, if you correct positions far before the most
+   *  recently added position, as it's a simple linear
+   *  searhc backwards through all offset corrections added
+   *  by {@link #addOffCorrectMap}. */
   protected int correct(int currentOff) {
     if (pcmList == null || pcmList.isEmpty()) {
       return currentOff;

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java Tue Aug 25 12:02:17
2009
@@ -21,10 +21,10 @@
 import java.io.Reader;
 
 /**
- * CharReader is a Reader wrapper. It reads chars from Reader and outputs CharStream.
- *
- * @version $Id$
- *
+ * CharReader is a Reader wrapper. It reads chars from
+ * Reader and outputs {@link CharStream}, defining an
+ * identify fucntion {@link #correctOffset} method that
+ * simply returns the provided offset.
  */
 public final class CharReader extends CharStream {
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java Tue Aug 25 12:02:17
2009
@@ -20,17 +20,23 @@
 import java.io.Reader;
 
 /**
- * CharStream adds <a href="#correctOffset(int)">correctOffset</a> functionality
over Reader.
- *
- * @version $Id$
+ * CharStream adds <a
+ * href="#correctOffset(int)">correctOffset</a>
+ * functionality over Reader.  All Tokenizers accept a
+ * CharStream as input, which enables arbitrary character
+ * based filtering before tokenization.  The {@link
+ * #correctOffset} method fixed offsets to account for
+ * removal or insertion of characters, so that the offsets
+ * reported in the tokens match the character offsets of the
+ * original Reader.
  */
 public abstract class CharStream extends Reader {
 
   /**
    * Called by CharFilter(s) and Tokenizer to correct token offset.
    *
-   * @param currentOff current offset
-   * @return corrected token offset
+   * @param currentOff offset as seen in the output
+   * @return corrected offset based on the input
    */
   public abstract int correctOffset(int currentOff);
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java Tue Aug 25
12:02:17 2009
@@ -21,10 +21,10 @@
 import java.util.LinkedList;
 
 /**
- * {@link CharFilter} that applies the mappings contained in
- * a {@link NormalizeCharMap} to the character stream.
- *
- * @version $Id$
+ * Simplistic {@link CharFilter} that applies the mappings
+ * contained in a {@link NormalizeCharMap} to the character
+ * stream, and correcting the resulting changes to the
+ * offsets.
  */
 public class MappingCharFilter extends BaseCharFilter {
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java Tue Aug 25
12:02:17 2009
@@ -23,7 +23,6 @@
 /**
  * Holds a map of String input to String output, to be used
  * with {@link MappingCharFilter}.
- * @version $Id$
  */
 public class NormalizeCharMap {
 
@@ -32,6 +31,14 @@
   String normStr;
   int diff;
 
+  /** Records a replacement to be applied to the inputs
+   *  stream.  Whenever <code>singleMatch</code> occurs in
+   *  the input, it will be replaced with
+   *  <code>replacement</code>.
+   *
+   * @param singleMatch input String to be replaced
+   * @param replacement output String
+   */
   public void add(String singleMatch, String replacement) {
     NormalizeCharMap currMap = this;
     for(int i = 0; i < singleMatch.length(); i++) {



Mime
View raw message