lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r826404 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/test/org/apache/lucene/analysis/
Date Sun, 18 Oct 2009 10:37:56 GMT
Author: uschindler
Date: Sun Oct 18 10:37:56 2009
New Revision: 826404

URL: http://svn.apache.org/viewvc?rev=826404&view=rev
Log:
LUCENE-1987: Remove lots of deprecations from analysis package. Undeprecate some Token ctors.

Removed:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharacterCache.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/common-build.xml
    lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestToken.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=826404&r1=826403&r2=826404&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Oct 18 10:37:56 2009
@@ -75,6 +75,12 @@
 * LUCENE-1979: Remove remaining deprecations from indexer package.
   (Uwe Schindler, Michael Busch)
 
+* LUCENE-1989: Generify CharArraySet. (Uwe Schindler)
+
+* LUCENE-1987: Rremove deprecations from analysis package and Token.
+  Un-deprecate some ctors of Token, as they are still useful.
+  (Uwe Schindler)
+
 Bug fixes
 
 * LUCENE-1951: When the text provided to WildcardQuery has no wildcard

Modified: lucene/java/trunk/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/common-build.xml?rev=826404&r1=826403&r2=826404&view=diff
==============================================================================
--- lucene/java/trunk/common-build.xml (original)
+++ lucene/java/trunk/common-build.xml Sun Oct 18 10:37:56 2009
@@ -42,7 +42,7 @@
   <property name="Name" value="Lucene"/>
   <property name="dev.version" value="3.0-dev"/>
   <property name="version" value="${dev.version}"/>
-  <property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091014"/>
+  <property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091018"/>
   <property name="spec.version" value="${version}"/>	
   <property name="year" value="2000-${current.year}"/>
   <property name="final.name" value="lucene-${name}-${version}"/>

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java?rev=826404&r1=826403&r2=826404&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java Sun Oct 18
10:37:56 2009
@@ -30,8 +30,7 @@
 public class MappingCharFilter extends BaseCharFilter {
 
   private final NormalizeCharMap normMap;
-  //private LinkedList<Character> buffer;
-  private LinkedList buffer;
+  private LinkedList<Character> buffer;
   private String replacement;
   private int charPointer;
   private int nextCharCounter;
@@ -57,7 +56,7 @@
       int firstChar = nextChar();
       if (firstChar == -1) return -1;
       NormalizeCharMap nm = normMap.submap != null ?
-        (NormalizeCharMap)normMap.submap.get(CharacterCache.valueOf((char) firstChar)) :
null;
+        normMap.submap.get(Character.valueOf((char) firstChar)) : null;
       if (nm == null) return firstChar;
       NormalizeCharMap result = match(nm);
       if (result == null) return firstChar;
@@ -78,7 +77,7 @@
   private int nextChar() throws IOException {
     nextCharCounter++;
     if (buffer != null && !buffer.isEmpty()) {
-      return ((Character)buffer.removeFirst()).charValue();
+      return buffer.removeFirst().charValue();
     }
     return input.read();
   }
@@ -86,15 +85,15 @@
   private void pushChar(int c) {
     nextCharCounter--;
     if(buffer == null)
-      buffer = new LinkedList();
-    buffer.addFirst(new Character((char) c));
+      buffer = new LinkedList<Character>();
+    buffer.addFirst(Character.valueOf((char) c));
   }
 
   private void pushLastChar(int c) {
     if (buffer == null) {
-      buffer = new LinkedList();
+      buffer = new LinkedList<Character>();
     }
-    buffer.addLast(new Character((char) c));
+    buffer.addLast(Character.valueOf((char) c));
   }
 
   private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
@@ -102,7 +101,7 @@
     if (map.submap != null) {
       int chr = nextChar();
       if (chr != -1) {
-        NormalizeCharMap subMap = (NormalizeCharMap) map.submap.get(CharacterCache.valueOf((char)
chr));
+        NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
         if (subMap != null) {
           result = match(subMap);
         }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java?rev=826404&r1=826403&r2=826404&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java Sun Oct 18
10:37:56 2009
@@ -45,10 +45,10 @@
       if (currMap.submap == null) {
         currMap.submap = new HashMap<Character, NormalizeCharMap>(1);
       }
-      NormalizeCharMap map = currMap.submap.get(CharacterCache.valueOf(c));
+      NormalizeCharMap map = currMap.submap.get(Character.valueOf(c));
       if (map == null) {
         map = new NormalizeCharMap();
-        currMap.submap.put(new Character(c), map);
+        currMap.submap.put(Character.valueOf(c), map);
       }
       currMap = map;
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java?rev=826404&r1=826403&r2=826404&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java Sun Oct 18 10:37:56 2009
@@ -58,18 +58,6 @@
   to easily switch from the old to the new TokenStream API.
   
   <br><br>
-
-  <p><b>NOTE:</b> As of 2.3, Token stores the term text
-  internally as a malleable char[] termBuffer instead of
-  String termText.  The indexing code and core tokenizers
-  have been changed to re-use a single Token instance, changing
-  its buffer and other fields in-place as the Token is
-  processed.  This provides substantially better indexing
-  performance as it saves the GC cost of new'ing a Token and
-  String for every term.  The APIs that accept String
-  termText are still available but a warning about the
-  associated performance cost has been added (below).  The
-  {@link #termText()} method has been deprecated.</p>
   
   <p>Tokenizers and filters should try to re-use a Token
   instance when possible for best performance, by
@@ -135,61 +123,13 @@
 
   private static int MIN_BUFFER_SIZE = 10;
 
-  /** @deprecated We will remove this when we remove the
-   * deprecated APIs */
-  private String termText;
-
-  /**
-   * Characters for the term text.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #termBuffer()}, 
-   * {@link #setTermBuffer(char[], int, int)},
-   * {@link #setTermBuffer(String)}, or
-   * {@link #setTermBuffer(String, int, int)}
-   */
-  char[] termBuffer;
-
-  /**
-   * Length of term text in the buffer.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #termLength()}, or @{link setTermLength(int)}.
-   */
-  int termLength;
-
-  /**
-   * Start in source text.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #startOffset()}, or @{link setStartOffset(int)}.
-   */
-  int startOffset;
-
-  /**
-   * End in source text.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #endOffset()}, or @{link setEndOffset(int)}.
-   */
-  int endOffset;
-
-  /**
-   * The lexical type of the token.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #type()}, or @{link setType(String)}.
-   */
-  String type = DEFAULT_TYPE;
-
+  private char[] termBuffer;
+  private int termLength;
+  private int startOffset,endOffset;
+  private String type = DEFAULT_TYPE;
   private int flags;
-  
-  /**
-   * @deprecated This will be made private. Instead, use:
-   * {@link #getPayload()}, or @{link setPayload(Payload)}.
-   */
-  Payload payload;
-  
-  /**
-   * @deprecated This will be made private. Instead, use:
-   * {@link #getPositionIncrement()}, or @{link setPositionIncrement(String)}.
-   */
-  int positionIncrement = 1;
+  private Payload payload;
+  private int positionIncrement = 1;
 
   /** Constructs a Token will null text. */
   public Token() {
@@ -236,10 +176,9 @@
    *  @param text term text
    *  @param start start offset
    *  @param end end offset
-   *  @deprecated Use {@link #Token(char[], int, int, int, int)} instead.
    */
   public Token(String text, int start, int end) {
-    termText = text;
+    setTermBuffer(text);
     startOffset = start;
     endOffset = end;
   }
@@ -252,10 +191,9 @@
    *  @param start start offset
    *  @param end end offset
    *  @param typ token type
-   *  @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setType(String)}
instead.
    */
   public Token(String text, int start, int end, String typ) {
-    termText = text;
+    setTermBuffer(text);
     startOffset = start;
     endOffset = end;
     type = typ;
@@ -270,10 +208,9 @@
    * @param start
    * @param end
    * @param flags token type bits
-   * @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setFlags(int)}
instead.
    */
   public Token(String text, int start, int end, int flags) {
-    termText = text;
+    setTermBuffer(text);
     startOffset = start;
     endOffset = end;
     this.flags = flags;
@@ -335,32 +272,6 @@
     return positionIncrement;
   }
 
-  /** Sets the Token's term text.  <b>NOTE:</b> for better
-   *  indexing speed you should instead use the char[]
-   *  termBuffer methods to set the term text.
-   *  @deprecated use {@link #setTermBuffer(char[], int, int)} or
-   *                  {@link #setTermBuffer(String)} or
-   *                  {@link #setTermBuffer(String, int, int)}.
-   */
-  public void setTermText(String text) {
-    termText = text;
-    termBuffer = null;
-  }
-
-  /** Returns the Token's term text.
-   * 
-   * @deprecated This method now has a performance penalty
-   * because the text is stored internally in a char[].  If
-   * possible, use {@link #termBuffer()} and {@link
-   * #termLength()} directly instead.  If you really need a
-   * String, use {@link #term()}</b>
-   */
-  public final String termText() {
-    if (termText == null && termBuffer != null)
-      termText = new String(termBuffer, 0, termLength);
-    return termText;
-  }
-
   /** Returns the Token's term text.
    * 
    * This method has a performance penalty
@@ -371,8 +282,6 @@
    * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
    */
   public final String term() {
-    if (termText != null)
-      return termText;
     initTermBuffer();
     return new String(termBuffer, 0, termLength);
   }
@@ -384,7 +293,6 @@
    *  @param length the number of characters to copy
    */
   public final void setTermBuffer(char[] buffer, int offset, int length) {
-    termText = null;
     growTermBuffer(length);
     System.arraycopy(buffer, offset, termBuffer, 0, length);
     termLength = length;
@@ -394,7 +302,6 @@
    *  @param buffer the buffer to copy
    */
   public final void setTermBuffer(String buffer) {
-    termText = null;
     final int length = buffer.length();
     growTermBuffer(length);
     buffer.getChars(0, length, termBuffer, 0);
@@ -410,7 +317,6 @@
   public final void setTermBuffer(String buffer, int offset, int length) {
     assert offset <= buffer.length();
     assert offset + length <= buffer.length();
-    termText = null;
     growTermBuffer(length);
     buffer.getChars(offset, offset + length, termBuffer, 0);
     termLength = length;
@@ -441,17 +347,7 @@
   public char[] resizeTermBuffer(int newSize) {
     if (termBuffer == null) {
       // The buffer is always at least MIN_BUFFER_SIZE
-      newSize = newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize;
-      //Preserve termText 
-      if (termText != null) {
-        final int ttLen = termText.length();
-        newSize = newSize < ttLen ? ttLen : newSize;
-        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
-        termText.getChars(0, termText.length(), termBuffer, 0);
-        termText = null;
-      } else { // no term Text, the first allocation
-        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
-      }    
+      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE
: newSize)]; 
     } else {
       if(termBuffer.length < newSize){
         // Not big enough; create a new array with slight
@@ -481,25 +377,10 @@
     } 
   }
   
-
-  // TODO: once we remove the deprecated termText() method
-  // and switch entirely to char[] termBuffer we don't need
-  // to use this method anymore, only for late init of the buffer
   private void initTermBuffer() {
     if (termBuffer == null) {
-      if (termText == null) {
-        termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
-        termLength = 0;
-      } else {
-        int length = termText.length();
-        if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE;
-        termBuffer = new char[ArrayUtil.getNextSize(length)];
-        termLength = termText.length();
-        termText.getChars(0, termText.length(), termBuffer, 0);
-        termText = null;
-      }
-    } else {
-      termText = null;
+      termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
+      termLength = 0;
     }
   }
 
@@ -528,7 +409,7 @@
     corresponding to this token in the source text.
 
     Note that the difference between endOffset() and startOffset() may not be
-    equal to termText.length(), as the term text may have been altered by a
+    equal to {@link #termLength}, as the term text may have been altered by a
     stemmer or some other filter. */
   public final int startOffset() {
     return startOffset;
@@ -630,7 +511,6 @@
     payload = null;
     // Leave termBuffer to allow re-use
     termLength = 0;
-    termText = null;
     positionIncrement = 1;
     flags = 0;
     startOffset = endOffset = 0;

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestToken.java?rev=826404&r1=826403&r2=826404&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestToken.java Sun Oct 18 10:37:56
2009
@@ -150,7 +150,7 @@
     t.setTermBuffer(b, 0, 5);
     assertEquals("(aloha,0,5)", t.toString());
 
-    t.setTermText("hi there");
+    t.setTermBuffer("hi there");
     assertEquals("(hi there,0,5)", t.toString());
   }
 
@@ -171,20 +171,17 @@
   
   public void testMixedStringArray() throws Exception {
     Token t = new Token("hello", 0, 5);
-    assertEquals(t.termText(), "hello");
     assertEquals(t.termLength(), 5);
     assertEquals(t.term(), "hello");
-    t.setTermText("hello2");
+    t.setTermBuffer("hello2");
     assertEquals(t.termLength(), 6);
     assertEquals(t.term(), "hello2");
     t.setTermBuffer("hello3".toCharArray(), 0, 6);
-    assertEquals(t.termText(), "hello3");
+    assertEquals(t.term(), "hello3");
 
-    // Make sure if we get the buffer and change a character
-    // that termText() reflects the change
     char[] buffer = t.termBuffer();
     buffer[1] = 'o';
-    assertEquals(t.termText(), "hollo3");
+    assertEquals(t.term(), "hollo3");
   }
   
   public void testClone() throws Exception {



Mime
View raw message