lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject [2/2] lucene-solr:branch_6x: LUCENE-7329: Simplify CharacterUtils.
Date Mon, 13 Jun 2016 13:34:48 GMT
LUCENE-7329: Simplify CharacterUtils.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/061f6880
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/061f6880
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/061f6880

Branch: refs/heads/branch_6x
Commit: 061f688022debf8db001886bc4e4847cc03c572d
Parents: f1ddc55
Author: Adrien Grand <jpountz@gmail.com>
Authored: Mon Jun 13 15:23:08 2016 +0200
Committer: Adrien Grand <jpountz@gmail.com>
Committed: Mon Jun 13 15:33:57 2016 +0200

----------------------------------------------------------------------
 .../lucene/analysis/core/LowerCaseFilter.java   |   3 +-
 .../lucene/analysis/core/UpperCaseFilter.java   |   3 +-
 .../analysis/el/GreekLowerCaseFilter.java       |   4 +-
 .../analysis/ngram/EdgeNGramTokenFilter.java    |   7 +-
 .../lucene/analysis/ngram/NGramTokenFilter.java |  13 +-
 .../lucene/analysis/ngram/NGramTokenizer.java   |   8 +-
 .../lucene/analysis/util/CharArrayMap.java      |  17 +-
 .../lucene/analysis/util/CharTokenizer.java     |   5 +-
 .../lucene/analysis/util/CharacterUtils.java    | 229 ++++---------------
 .../TestStemmerOverrideFilter.java              |   4 +-
 .../analysis/util/TestCharacterUtils.java       | 155 ++-----------
 .../analysis/morfologik/MorfologikFilter.java   |   4 +-
 12 files changed, 86 insertions(+), 366 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
index d1198a6..ade6a58 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@@ -28,7 +28,6 @@ import org.apache.lucene.analysis.util.CharacterUtils;
  * Normalizes token text to lower case.
  */
 public final class LowerCaseFilter extends TokenFilter {
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   
   /**
@@ -43,7 +42,7 @@ public final class LowerCaseFilter extends TokenFilter {
   @Override
   public final boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-      charUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
+      CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
       return true;
     } else
       return false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
index 9c2c283..6d3f6bb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.util.CharacterUtils;
  * general search matching
  */
 public final class UpperCaseFilter extends TokenFilter {
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   
   /**
@@ -48,7 +47,7 @@ public final class UpperCaseFilter extends TokenFilter {
   @Override
   public final boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-      charUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length());
+      CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length());
       return true;
     } else
       return false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
index e4aecf3..3185b2d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
@@ -21,7 +21,6 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
 
 /**
  * Normalizes token text to lower case, removes some Greek diacritics,
@@ -29,7 +28,6 @@ import org.apache.lucene.analysis.util.CharacterUtils;
  */
 public final class GreekLowerCaseFilter extends TokenFilter {
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
 
   /**
    * Create a GreekLowerCaseFilter that normalizes Greek token text.
@@ -47,7 +45,7 @@ public final class GreekLowerCaseFilter extends TokenFilter {
       int chLen = termAtt.length();
       for (int i = 0; i < chLen;) {
         i += Character.toChars(
-            lowerCase(charUtils.codePointAt(chArray, i, chLen)), chArray, i);
+            lowerCase(Character.codePointAt(chArray, i, chLen)), chArray, i);
        }
       return true;
     } else {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 2c10778..827e26f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -25,7 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
 
 /**
  * Tokenizes the given token into n-grams of given size(s).
@@ -38,7 +37,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
   public static final int DEFAULT_MAX_GRAM_SIZE = 1;
   public static final int DEFAULT_MIN_GRAM_SIZE = 1;
 
-  private final CharacterUtils charUtils;
   private final int minGram;
   private final int maxGram;
   private char[] curTermBuffer;
@@ -73,7 +71,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
       throw new IllegalArgumentException("minGram must not be greater than maxGram");
     }
 
-    this.charUtils = CharacterUtils.getInstance();
     this.minGram = minGram;
     this.maxGram = maxGram;
   }
@@ -87,7 +84,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
         } else {
           curTermBuffer = termAtt.buffer().clone();
           curTermLength = termAtt.length();
-          curCodePointCount = charUtils.codePointCount(termAtt);
+          curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
           curGramSize = minGram;
           tokStart = offsetAtt.startOffset();
           tokEnd = offsetAtt.endOffset();
@@ -108,7 +105,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
             posIncrAtt.setPositionIncrement(0);
           }
           posLenAtt.setPositionLength(savePosLen);
-          final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength,
0, curGramSize);
+          final int charLength = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength,
0, curGramSize);
           termAtt.copyBuffer(curTermBuffer, 0, charLength);
           curGramSize++;
           return true;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index 5a84bff..e275cfa 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -26,7 +26,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
 
 /**
  * Tokenizes the input into n-grams of the given size(s).
@@ -56,9 +55,7 @@ public final class NGramTokenFilter extends TokenFilter {
   private int curPosInc, curPosLen;
   private int tokStart;
   private int tokEnd;
-  private boolean hasIllegalOffsets; // only if the length changed before this filter
 
-  private final CharacterUtils charUtils;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt;
   private final PositionLengthAttribute posLenAtt;
@@ -72,7 +69,6 @@ public final class NGramTokenFilter extends TokenFilter {
    */
   public NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
     super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
-    this.charUtils = CharacterUtils.getInstance();
     if (minGram < 1) {
       throw new IllegalArgumentException("minGram must be greater than zero");
     }
@@ -104,16 +100,13 @@ public final class NGramTokenFilter extends TokenFilter {
         } else {
           curTermBuffer = termAtt.buffer().clone();
           curTermLength = termAtt.length();
-          curCodePointCount = charUtils.codePointCount(termAtt);
+          curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
           curGramSize = minGram;
           curPos = 0;
           curPosInc = posIncAtt.getPositionIncrement();
           curPosLen = posLenAtt.getPositionLength();
           tokStart = offsetAtt.startOffset();
           tokEnd = offsetAtt.endOffset();
-          // if length by start + end offsets doesn't match the term text then assume
-          // this is a synonym and don't adjust the offsets.
-          hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
         }
       }
 
@@ -123,8 +116,8 @@ public final class NGramTokenFilter extends TokenFilter {
       }
       if ((curPos + curGramSize) <= curCodePointCount) {
         clearAttributes();
-        final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0,
curPos);
-        final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start,
curGramSize);
+        final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0,
curPos);
+        final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start,
curGramSize);
         termAtt.copyBuffer(curTermBuffer, start, end - start);
         posIncAtt.setPositionIncrement(curPosInc);
         curPosInc = 0;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
index 1c8aa7c..da104c9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@@ -57,7 +57,6 @@ public class NGramTokenizer extends Tokenizer {
   public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
   public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
 
-  private CharacterUtils charUtils;
   private CharacterUtils.CharacterBuffer charBuffer;
   private int[] buffer; // like charBuffer, but converted to code points
   private int bufferStart, bufferEnd; // remaining slice in buffer
@@ -110,7 +109,6 @@ public class NGramTokenizer extends Tokenizer {
   }
 
   private void init(int minGram, int maxGram, boolean edgesOnly) {
-    charUtils = CharacterUtils.getInstance();
     if (minGram < 1) {
       throw new IllegalArgumentException("minGram must be greater than zero");
     }
@@ -142,9 +140,9 @@ public class NGramTokenizer extends Tokenizer {
         bufferStart = 0;
 
         // fill in remaining space
-        exhausted = !charUtils.fill(charBuffer, input, buffer.length - bufferEnd);
+        exhausted = !CharacterUtils.fill(charBuffer, input, buffer.length - bufferEnd);
         // convert to code points
-        bufferEnd += charUtils.toCodePoints(charBuffer.getBuffer(), 0, charBuffer.getLength(),
buffer, bufferEnd);
+        bufferEnd += CharacterUtils.toCodePoints(charBuffer.getBuffer(), 0, charBuffer.getLength(),
buffer, bufferEnd);
       }
 
       // should we go to the next offset?
@@ -168,7 +166,7 @@ public class NGramTokenizer extends Tokenizer {
         continue;
       }
 
-      final int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(),
0);
+      final int length = CharacterUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(),
0);
       termAtt.setLength(length);
       posIncAtt.setPositionIncrement(1);
       posLenAtt.setPositionLength(1);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
index 289ee08..e414366 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
@@ -40,7 +40,6 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
   private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
 
   private final static int INIT_SIZE = 8;
-  private final CharacterUtils charUtils;
   private boolean ignoreCase;  
   private int count;
   char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
@@ -63,7 +62,6 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
       size <<= 1;
     keys = new char[size][];
     values = (V[]) new Object[size];
-    this.charUtils = CharacterUtils.getInstance();
   }
 
   /**
@@ -86,7 +84,6 @@ public class CharArrayMap<V> extends AbstractMap<Object,V> {
     this.values = toCopy.values;
     this.ignoreCase = toCopy.ignoreCase;
     this.count = toCopy.count;
-    this.charUtils = toCopy.charUtils;
   }
   
   /** Clears all entries in this map. This method is supported for reusing, but not {@link
Map#remove}. */
@@ -192,7 +189,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V>
{
    */
   public V put(char[] text, V value) {
     if (ignoreCase) {
-      charUtils.toLowerCase(text, 0, text.length);
+      CharacterUtils.toLowerCase(text, 0, text.length);
     }
     int slot = getSlot(text, 0, text.length);
     if (keys[slot] != null) {
@@ -237,8 +234,8 @@ public class CharArrayMap<V> extends AbstractMap<Object,V>
{
     final int limit = off+len;
     if (ignoreCase) {
       for(int i=0;i<len;) {
-        final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
-        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i, text2.length))
+        final int codePointAt = Character.codePointAt(text1, off+i, limit);
+        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
           return false;
         i += Character.charCount(codePointAt); 
       }
@@ -257,8 +254,8 @@ public class CharArrayMap<V> extends AbstractMap<Object,V>
{
       return false;
     if (ignoreCase) {
       for(int i=0;i<len;) {
-        final int codePointAt = charUtils.codePointAt(text1, i);
-        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i, text2.length))
+        final int codePointAt = Character.codePointAt(text1, i);
+        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
           return false;
         i += Character.charCount(codePointAt);
       }
@@ -278,7 +275,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V>
{
     final int stop = offset + len;
     if (ignoreCase) {
       for (int i=offset; i<stop;) {
-        final int codePointAt = charUtils.codePointAt(text, i, stop);
+        final int codePointAt = Character.codePointAt(text, i, stop);
         code = code*31 + Character.toLowerCase(codePointAt);
         i += Character.charCount(codePointAt);
       }
@@ -297,7 +294,7 @@ public class CharArrayMap<V> extends AbstractMap<Object,V>
{
     int len = text.length();
     if (ignoreCase) {
       for (int i=0; i<len;) {
-        int codePointAt = charUtils.codePointAt(text, i);
+        int codePointAt = Character.codePointAt(text, i);
         code = code*31 + Character.toLowerCase(codePointAt);
         i += Character.charCount(codePointAt);
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index 7683239..4952f99 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -199,7 +199,6 @@ public abstract class CharTokenizer extends Tokenizer {
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
   private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
   
   /**
@@ -229,7 +228,7 @@ public abstract class CharTokenizer extends Tokenizer {
     while (true) {
       if (bufferIndex >= dataLen) {
         offset += dataLen;
-        charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
+        CharacterUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
         if (ioBuffer.getLength() == 0) {
           dataLen = 0; // so next offset += dataLen won't decrement offset
           if (length > 0) {
@@ -243,7 +242,7 @@ public abstract class CharTokenizer extends Tokenizer {
         bufferIndex = 0;
       }
       // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char
based methods are gone
-      final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength());
+      final int c = Character.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength());
       final int charCount = Character.charCount(c);
       bufferIndex += charCount;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
index f14b1f7..b728523 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
@@ -20,76 +20,13 @@ package org.apache.lucene.analysis.util;
 import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.lucene.util.Version;
-
 /**
- * {@link CharacterUtils} provides a unified interface to Character-related
- * operations to implement backwards compatible character operations based on a
- * {@link Version} instance.
- * 
+ * Utility class to write tokenizers or token filters.
  * @lucene.internal
  */
-public abstract class CharacterUtils {
-  private static final Java4CharacterUtils JAVA_4 = new Java4CharacterUtils();
-  private static final Java5CharacterUtils JAVA_5 = new Java5CharacterUtils();
-
-  /**
-   * Returns a {@link CharacterUtils} implementation.
-   * @return a {@link CharacterUtils} implementation according to the given
-   *         {@link Version} instance.
-   */
-  public static CharacterUtils getInstance() {
-    return JAVA_5;
-  }
-  
-  /** 
-   * explicitly returns a version matching java 4 semantics 
-   * @deprecated Only for n-gram backwards compat
-   */
-  @Deprecated
-  public static CharacterUtils getJava4Instance() {
-    return JAVA_4;
-  }
-
-  /**
-   * Returns the code point at the given index of the {@link CharSequence}.
-   * 
-   * @param seq
-   *          a character sequence
-   * @param offset
-   *          the offset to the char values in the chars array to be converted
-   * 
-   * @return the Unicode code point at the given index
-   * @throws NullPointerException
-   *           - if the sequence is null.
-   * @throws IndexOutOfBoundsException
-   *           - if the value offset is negative or not less than the length of
-   *           the character sequence.
-   */
-  public abstract int codePointAt(final CharSequence seq, final int offset);
-  
-  /**
-   * Returns the code point at the given index of the char array where only elements
-   * with index less than the limit are used.
-   * 
-   * @param chars
-   *          a character array
-   * @param offset
-   *          the offset to the char values in the chars array to be converted
-   * @param limit the index afer the last element that should be used to calculate
-   *        codepoint.  
-   * 
-   * @return the Unicode code point at the given index
-   * @throws NullPointerException
-   *           - if the array is null.
-   * @throws IndexOutOfBoundsException
-   *           - if the value offset is negative or not less than the length of
-   *           the char array.
-   */
-  public abstract int codePointAt(final char[] chars, final int offset, final int limit);
+public final class CharacterUtils {
 
-  /** Return the number of characters in <code>seq</code>. */
-  public abstract int codePointCount(CharSequence seq);
+  private CharacterUtils() {} // no instantiation
 
   /**
    * Creates a new {@link CharacterBuffer} and allocates a <code>char[]</code>
@@ -114,13 +51,13 @@ public abstract class CharacterUtils {
    * @param offset the offset to start at
    * @param limit the max char in the buffer to lower case
    */
-  public final void toLowerCase(final char[] buffer, final int offset, final int limit) {
+  public static void toLowerCase(final char[] buffer, final int offset, final int limit)
{
     assert buffer.length >= limit;
     assert offset <=0 && offset <= buffer.length;
     for (int i = offset; i < limit;) {
       i += Character.toChars(
               Character.toLowerCase(
-                  codePointAt(buffer, i, limit)), buffer, i);
+                  Character.codePointAt(buffer, i, limit)), buffer, i);
      }
   }
 
@@ -131,25 +68,25 @@ public abstract class CharacterUtils {
    * @param offset the offset to start at
    * @param limit the max char in the buffer to lower case
    */
-  public final void toUpperCase(final char[] buffer, final int offset, final int limit) {
+  public static void toUpperCase(final char[] buffer, final int offset, final int limit)
{
     assert buffer.length >= limit;
     assert offset <=0 && offset <= buffer.length;
     for (int i = offset; i < limit;) {
       i += Character.toChars(
               Character.toUpperCase(
-                  codePointAt(buffer, i, limit)), buffer, i);
+                  Character.codePointAt(buffer, i, limit)), buffer, i);
      }
   }
 
   /** Converts a sequence of Java characters to a sequence of unicode code points.
    *  @return the number of code points written to the destination buffer */
-  public final int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
{
+  public static int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
{
     if (srcLen < 0) {
       throw new IllegalArgumentException("srcLen must be >= 0");
     }
     int codePointCount = 0;
     for (int i = 0; i < srcLen; ) {
-      final int cp = codePointAt(src, srcOff + i, srcOff + srcLen);
+      final int cp = Character.codePointAt(src, srcOff + i, srcOff + srcLen);
       final int charCount = Character.charCount(cp);
       dest[destOff + codePointCount++] = cp;
       i += charCount;
@@ -159,7 +96,7 @@ public abstract class CharacterUtils {
 
   /** Converts a sequence of unicode code points to a sequence of Java characters.
    *  @return the number of chars written to the destination buffer */
-  public final int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff) {
+  public static int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
{
     if (srcLen < 0) {
       throw new IllegalArgumentException("srcLen must be >= 0");
     }
@@ -202,17 +139,45 @@ public abstract class CharacterUtils {
    * @throws IOException
    *           if the reader throws an {@link IOException}.
    */
-  public abstract boolean fill(CharacterBuffer buffer, Reader reader, int numChars) throws
IOException;
+  public static boolean fill(CharacterBuffer buffer, Reader reader, int numChars) throws
IOException {
+    assert buffer.buffer.length >= 2;
+    if (numChars < 2 || numChars > buffer.buffer.length) {
+      throw new IllegalArgumentException("numChars must be >= 2 and <= the buffer size");
+    }
+    final char[] charBuffer = buffer.buffer;
+    buffer.offset = 0;
+    final int offset;
+
+    // Install the previously saved ending high surrogate:
+    if (buffer.lastTrailingHighSurrogate != 0) {
+      charBuffer[0] = buffer.lastTrailingHighSurrogate;
+      buffer.lastTrailingHighSurrogate = 0;
+      offset = 1;
+    } else {
+      offset = 0;
+    }
+
+    final int read = readFully(reader, charBuffer, offset, numChars - offset);
+
+    buffer.length = offset + read;
+    final boolean result = buffer.length == numChars;
+    if (buffer.length < numChars) {
+      // We failed to fill the buffer. Even if the last char is a high
+      // surrogate, there is nothing we can do
+      return result;
+    }
+
+    if (Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
+      buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
+    }
+    return result;
+  }
 
   /** Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>.
*/
-  public final boolean fill(CharacterBuffer buffer, Reader reader) throws IOException {
+  public static boolean fill(CharacterBuffer buffer, Reader reader) throws IOException {
     return fill(buffer, reader, buffer.buffer.length);
   }
 
-  /** Return the index within <code>buf[start:start+count]</code> which is by
<code>offset</code>
-   *  code points from <code>index</code>. */
-  public abstract int offsetByCodePoints(char[] buf, int start, int count, int index, int
offset);
-
   static int readFully(Reader reader, char[] dest, int offset, int len) throws IOException
{
     int read = 0;
     while (read < len) {
@@ -225,112 +190,6 @@ public abstract class CharacterUtils {
     return read;
   }
 
-  private static final class Java5CharacterUtils extends CharacterUtils {
-    Java5CharacterUtils() {
-    }
-
-    @Override
-    public int codePointAt(final CharSequence seq, final int offset) {
-      return Character.codePointAt(seq, offset);
-    }
-
-    @Override
-    public int codePointAt(final char[] chars, final int offset, final int limit) {
-     return Character.codePointAt(chars, offset, limit);
-    }
-
-    @Override
-    public boolean fill(final CharacterBuffer buffer, final Reader reader, int numChars)
throws IOException {
-      assert buffer.buffer.length >= 2;
-      if (numChars < 2 || numChars > buffer.buffer.length) {
-        throw new IllegalArgumentException("numChars must be >= 2 and <= the buffer
size");
-      }
-      final char[] charBuffer = buffer.buffer;
-      buffer.offset = 0;
-      final int offset;
-
-      // Install the previously saved ending high surrogate:
-      if (buffer.lastTrailingHighSurrogate != 0) {
-        charBuffer[0] = buffer.lastTrailingHighSurrogate;
-        buffer.lastTrailingHighSurrogate = 0;
-        offset = 1;
-      } else {
-        offset = 0;
-      }
-
-      final int read = readFully(reader, charBuffer, offset, numChars - offset);
-
-      buffer.length = offset + read;
-      final boolean result = buffer.length == numChars;
-      if (buffer.length < numChars) {
-        // We failed to fill the buffer. Even if the last char is a high
-        // surrogate, there is nothing we can do
-        return result;
-      }
-
-      if (Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
-        buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
-      }
-      return result;
-    }
-
-    @Override
-    public int codePointCount(CharSequence seq) {
-      return Character.codePointCount(seq, 0, seq.length());
-    }
-
-    @Override
-    public int offsetByCodePoints(char[] buf, int start, int count, int index, int offset)
{
-      return Character.offsetByCodePoints(buf, start, count, index, offset);
-    }
-  }
-
-  private static final class Java4CharacterUtils extends CharacterUtils {
-    Java4CharacterUtils() {
-    }
-
-    @Override
-    public int codePointAt(final CharSequence seq, final int offset) {
-      return seq.charAt(offset);
-    }
-
-    @Override
-    public int codePointAt(final char[] chars, final int offset, final int limit) {
-      if(offset >= limit)
-        throw new IndexOutOfBoundsException("offset must be less than limit");
-      return chars[offset];
-    }
-
-    @Override
-    public boolean fill(CharacterBuffer buffer, Reader reader, int numChars)
-        throws IOException {
-      assert buffer.buffer.length >= 1;
-      if (numChars < 1 || numChars > buffer.buffer.length) {
-        throw new IllegalArgumentException("numChars must be >= 1 and <= the buffer
size");
-      }
-      buffer.offset = 0;
-      final int read = readFully(reader, buffer.buffer, 0, numChars);
-      buffer.length = read;
-      buffer.lastTrailingHighSurrogate = 0;
-      return read == numChars;
-    }
-
-    @Override
-    public int codePointCount(CharSequence seq) {
-      return seq.length();
-    }
-
-    @Override
-    public int offsetByCodePoints(char[] buf, int start, int count, int index, int offset)
{
-      final int result = index + offset;
-      if (result < 0 || result > count) {
-        throw new IndexOutOfBoundsException();
-      }
-      return result;
-    }
-
-  }
-  
   /**
    * A simple IO buffer to use with
    * {@link CharacterUtils#fill(CharacterBuffer, Reader)}.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
index d8b2fca..ef4856c 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
@@ -85,8 +85,6 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
     int numTerms = atLeast(50);
     boolean ignoreCase = random().nextBoolean();
 
-    CharacterUtils charUtils = CharacterUtils.getInstance();
-
     for (int i = 0; i < numTerms; i++) {
       String randomRealisticUnicodeString = TestUtil
           .randomRealisticUnicodeString(random());
@@ -107,7 +105,7 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase
{
         if (ignoreCase) {
           // TODO: can we simply use inputValue.toLowerCase(Locale.ROOT)???
           char[] buffer = inputValue.toCharArray();
-          charUtils.toLowerCase(buffer, 0, buffer.length);
+          CharacterUtils.toLowerCase(buffer, 0, buffer.length);
           seenInputValue = buffer.toString();
         } else {
           seenInputValue = inputValue;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
index 2faeec7..04e96ea 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
@@ -32,102 +32,15 @@ import org.junit.Test;
  */
 public class TestCharacterUtils extends LuceneTestCase {
 
-  @Test
-  public void testCodePointAtCharSequenceInt() {
-    CharacterUtils java4 = CharacterUtils.getJava4Instance();
-    String cpAt3 = "Abc\ud801\udc1c";
-    String highSurrogateAt3 = "Abc\ud801";
-    assertEquals((int) 'A', java4.codePointAt(cpAt3, 0));
-    assertEquals((int) '\ud801', java4.codePointAt(cpAt3, 3));
-    assertEquals((int) '\ud801', java4.codePointAt(highSurrogateAt3, 3));
-    expectThrows(IndexOutOfBoundsException.class, () -> {
-      java4.codePointAt(highSurrogateAt3, 4);
-    });
-
-    CharacterUtils java5 = CharacterUtils.getInstance();
-    assertEquals((int) 'A', java5.codePointAt(cpAt3, 0));
-    assertEquals(Character.toCodePoint('\ud801', '\udc1c'), java5.codePointAt(
-        cpAt3, 3));
-    assertEquals((int) '\ud801', java5.codePointAt(highSurrogateAt3, 3));
-    expectThrows(IndexOutOfBoundsException.class, () -> {
-      java5.codePointAt(highSurrogateAt3, 4);
-    });
-  }
-
-  @Test
-  public void testCodePointAtCharArrayIntInt() {
-    CharacterUtils java4 = CharacterUtils.getJava4Instance();
-    char[] cpAt3 = "Abc\ud801\udc1c".toCharArray();
-    char[] highSurrogateAt3 = "Abc\ud801".toCharArray();
-    assertEquals((int) 'A', java4.codePointAt(cpAt3, 0, 2));
-    assertEquals((int) '\ud801', java4.codePointAt(cpAt3, 3, 5));
-    assertEquals((int) '\ud801', java4.codePointAt(highSurrogateAt3, 3, 4));
-
-    CharacterUtils java5 = CharacterUtils.getInstance();
-    assertEquals((int) 'A', java5.codePointAt(cpAt3, 0, 2));
-    assertEquals(Character.toCodePoint('\ud801', '\udc1c'), java5.codePointAt(
-        cpAt3, 3, 5));
-    assertEquals((int) '\ud801', java5.codePointAt(highSurrogateAt3, 3, 4));
-  }
-
-  @Test
-  public void testCodePointCount() {
-    CharacterUtils java4 = CharacterUtils.getJava4Instance();
-    CharacterUtils java5 = CharacterUtils.getInstance();
-    final String s = TestUtil.randomUnicodeString(random());
-    assertEquals(s.length(), java4.codePointCount(s));
-    assertEquals(Character.codePointCount(s, 0, s.length()), java5.codePointCount(s));
-  }
-
-  @Test
-  public void testOffsetByCodePoint() {
-    CharacterUtils java4 = CharacterUtils.getJava4Instance();
-    CharacterUtils java5 = CharacterUtils.getInstance();
-    for (int i = 0; i < 10; ++i) {
-      final char[] s = TestUtil.randomUnicodeString(random()).toCharArray();
-      final int index = TestUtil.nextInt(random(), 0, s.length);
-      final int offset = random().nextInt(7) - 3;
-      try {
-        final int o = java4.offsetByCodePoints(s, 0, s.length, index, offset);
-        assertEquals(o, index + offset);
-      } catch (IndexOutOfBoundsException e) {
-        assertTrue((index + offset) < 0 || (index + offset) > s.length);
-      }
-  
-      int o;
-      try {
-        o = java5.offsetByCodePoints(s, 0, s.length, index, offset);
-      } catch (IndexOutOfBoundsException e) {
-        try {
-          Character.offsetByCodePoints(s, 0, s.length, index, offset);
-          fail();
-        } catch (IndexOutOfBoundsException e2) {
-          // OK
-        }
-        o = -1;
-      }
-      if (o >= 0) {
-        assertEquals(Character.offsetByCodePoints(s, 0, s.length, index, offset), o);
-      }
-    }
-  }
-
   public void testConversions() {
-    CharacterUtils java4 = CharacterUtils.getJava4Instance();
-    CharacterUtils java5 = CharacterUtils.getInstance();
-    testConversions(java4);
-    testConversions(java5);
-  }
-
-  private void testConversions(CharacterUtils charUtils) {
     final char[] orig = TestUtil.randomUnicodeString(random(), 100).toCharArray();
     final int[] buf = new int[orig.length];
     final char[] restored = new char[buf.length];
     final int o1 = TestUtil.nextInt(random(), 0, Math.min(5, orig.length));
     final int o2 = TestUtil.nextInt(random(), 0, o1);
     final int o3 = TestUtil.nextInt(random(), 0, o1);
-    final int codePointCount = charUtils.toCodePoints(orig, o1, orig.length - o1, buf, o2);
-    final int charCount = charUtils.toChars(buf, o2, codePointCount, restored, o3);
+    final int codePointCount = CharacterUtils.toCodePoints(orig, o1, orig.length - o1, buf,
o2);
+    final int charCount = CharacterUtils.toChars(buf, o2, codePointCount, restored, o3);
     assertEquals(orig.length - o1, charCount);
     assertArrayEquals(Arrays.copyOfRange(orig, o1, o1 + charCount), Arrays.copyOfRange(restored,
o3, o3 + charCount));
   }
@@ -152,71 +65,43 @@ public class TestCharacterUtils extends LuceneTestCase {
 
   @Test
   public void testFillNoHighSurrogate() throws IOException {
-    CharacterUtils versions[] = new CharacterUtils[] { 
-        CharacterUtils.getInstance(), 
-        CharacterUtils.getJava4Instance() };
-    for (CharacterUtils instance : versions) {
-      Reader reader = new StringReader("helloworld");
-      CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
-      assertTrue(instance.fill(buffer,reader));
-      assertEquals(0, buffer.getOffset());
-      assertEquals(6, buffer.getLength());
-      assertEquals("hellow", new String(buffer.getBuffer()));
-      assertFalse(instance.fill(buffer,reader));
-      assertEquals(4, buffer.getLength());
-      assertEquals(0, buffer.getOffset());
+    Reader reader = new StringReader("helloworld");
+    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
+    assertTrue(CharacterUtils.fill(buffer,reader));
+    assertEquals(0, buffer.getOffset());
+    assertEquals(6, buffer.getLength());
+    assertEquals("hellow", new String(buffer.getBuffer()));
+    assertFalse(CharacterUtils.fill(buffer,reader));
+    assertEquals(4, buffer.getLength());
+    assertEquals(0, buffer.getOffset());
 
-      assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(),
-          buffer.getLength()));
-      assertFalse(instance.fill(buffer,reader));
-    }
+    assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(),
+        buffer.getLength()));
+    assertFalse(CharacterUtils.fill(buffer,reader));
   }
 
   @Test
-  public void testFillJava15() throws IOException {
+  public void testFill() throws IOException {
     String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
-    CharacterUtils instance = CharacterUtils.getInstance();
     Reader reader = new StringReader(input);
     CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
-    assertTrue(instance.fill(buffer, reader));
+    assertTrue(CharacterUtils.fill(buffer, reader));
     assertEquals(4, buffer.getLength());
     assertEquals("1234", new String(buffer.getBuffer(), buffer.getOffset(),
         buffer.getLength()));
-    assertTrue(instance.fill(buffer, reader));
+    assertTrue(CharacterUtils.fill(buffer, reader));
     assertEquals(5, buffer.getLength());
     assertEquals("\ud801\udc1c789", new String(buffer.getBuffer()));
-    assertTrue(instance.fill(buffer, reader));
+    assertTrue(CharacterUtils.fill(buffer, reader));
     assertEquals(4, buffer.getLength());
     assertEquals("123\ud801", new String(buffer.getBuffer(),
         buffer.getOffset(), buffer.getLength()));
-    assertFalse(instance.fill(buffer, reader));
+    assertFalse(CharacterUtils.fill(buffer, reader));
     assertEquals(3, buffer.getLength());
     assertEquals("\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer
         .getOffset(), buffer.getLength()));
-    assertFalse(instance.fill(buffer, reader));
+    assertFalse(CharacterUtils.fill(buffer, reader));
     assertEquals(0, buffer.getLength());
   }
 
-  @Test
-  public void testFillJava14() throws IOException {
-    String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
-    CharacterUtils instance = CharacterUtils.getJava4Instance();
-    Reader reader = new StringReader(input);
-    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
-    assertTrue(instance.fill(buffer, reader));
-    assertEquals(5, buffer.getLength());
-    assertEquals("1234\ud801", new String(buffer.getBuffer(), buffer
-        .getOffset(), buffer.getLength()));
-    assertTrue(instance.fill(buffer, reader));
-    assertEquals(5, buffer.getLength());
-    assertEquals("\udc1c7891", new String(buffer.getBuffer()));
-    buffer = CharacterUtils.newCharacterBuffer(6);
-    assertTrue(instance.fill(buffer, reader));
-    assertEquals(6, buffer.getLength());
-    assertEquals("23\ud801\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer
-        .getOffset(), buffer.getLength()));
-    assertFalse(instance.fill(buffer, reader));
-
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/061f6880/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
index ff36dbe..3429d86 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.CharsRefBuilder;
 
 /**
@@ -54,7 +53,6 @@ public class MorfologikFilter extends TokenFilter {
   private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
 
   private final CharsRefBuilder scratch = new CharsRefBuilder();
-  private final CharacterUtils charUtils = CharacterUtils.getInstance();
 
   private State current;
   private final TokenStream input;
@@ -154,7 +152,7 @@ public class MorfologikFilter extends TokenFilter {
     char buffer[] = scratch.chars();
     for (int i = 0; i < length;) {
       i += Character.toChars(
-          Character.toLowerCase(charUtils.codePointAt(chs, i)), buffer, i);      
+          Character.toLowerCase(Character.codePointAt(chs, i)), buffer, i);      
     }
 
     return scratch.get();


Mime
View raw message