lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r895341 - in /lucene/java/trunk: ./ contrib/ contrib/icu/src/java/org/apache/lucene/collation/ src/java/org/apache/lucene/collation/ src/java/org/apache/lucene/util/ src/test/org/apache/lucene/util/
Date Sun, 03 Jan 2010 09:22:41 GMT
Author: rmuir
Date: Sun Jan  3 09:22:40 2010
New Revision: 895341

URL: http://svn.apache.org/viewvc?rev=895341&view=rev
Log:
LUCENE-2084: remove Byte/CharBuffer wrapping for collation key generation

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/collation/CollationKeyFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java
    lucene/java/trunk/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=895341&r1=895340&r2=895341&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Jan  3 09:22:40 2010
@@ -153,6 +153,11 @@
 * LUCENE-2169: Improved CharArraySet.copy(), if source set is
   also a CharArraySet.  (Simon Willnauer via Uwe Schindler)
 
+* LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[]
+  directly, instead of Byte/CharBuffers, and modify CollationKeyFilter to
+  take advantage of this for faster performance.
+  (Steven Rowe, Uwe Schindler, Robert Muir)
+   
 Build
 
  * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=895341&r1=895340&r2=895341&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Sun Jan  3 09:22:40 2010
@@ -73,6 +73,11 @@
  * LUCENE-2157: DelimitedPayloadTokenFilter no longer copies the buffer
    over itsself. Instead it sets only the length. This patch also optimizes
    the logic of the filter and uses NIO for IdentityEncoder. (Uwe Schindler)
+ 
+ * LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[]
+   directly, instead of Byte/CharBuffers, and modify ICUCollationKeyFilter to
+   take advantage of this for faster performance.
+   (Steven Rowe, Uwe Schindler, Robert Muir)
 
 Test Cases
 

Modified: lucene/java/trunk/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=895341&r1=895340&r2=895341&view=diff
==============================================================================
--- lucene/java/trunk/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
(original)
+++ lucene/java/trunk/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
Sun Jan  3 09:22:40 2010
@@ -23,13 +23,10 @@
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
 
 
 /**
@@ -92,15 +89,14 @@
       char[] termBuffer = termAtt.termBuffer();
       String termText = new String(termBuffer, 0, termAtt.termLength());
       collator.getRawCollationKey(termText, reusableKey);
-      ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
-      int encodedLength
-        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
+      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
+          reusableKey.bytes, 0, reusableKey.size);
       if (encodedLength > termBuffer.length) {
         termAtt.resizeTermBuffer(encodedLength);
       }
       termAtt.setTermLength(encodedLength);
-      CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
-      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+      IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
+          termAtt.termBuffer(), 0, encodedLength);
       return true;
     } else {
       return false;

Modified: lucene/java/trunk/src/java/org/apache/lucene/collation/CollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/collation/CollationKeyFilter.java?rev=895341&r1=895340&r2=895341&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/collation/CollationKeyFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/collation/CollationKeyFilter.java Sun Jan
 3 09:22:40 2010
@@ -24,8 +24,6 @@
 import org.apache.lucene.util.IndexableBinaryStringTools;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
 import java.text.Collator;
 
 
@@ -94,15 +92,14 @@
       char[] termBuffer = termAtt.termBuffer();
       String termText = new String(termBuffer, 0, termAtt.termLength());
       byte[] collationKey = collator.getCollationKey(termText).toByteArray();
-      ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
-      int encodedLength
-        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
+      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
+          collationKey, 0, collationKey.length);
       if (encodedLength > termBuffer.length) {
         termAtt.resizeTermBuffer(encodedLength);
       }
       termAtt.setTermLength(encodedLength);
-      CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
-      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+      IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
+          termAtt.termBuffer(), 0, encodedLength);
       return true;
     } else {
       return false;

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java?rev=895341&r1=895340&r2=895341&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java Sun
Jan  3 09:22:40 2010
@@ -23,29 +23,33 @@
 /**
  * Provides support for converting byte sequences to Strings and back again.
  * The resulting Strings preserve the original byte sequences' sort order.
- * 
+ * <p/>
  * The Strings are constructed using a Base 8000h encoding of the original
  * binary data - each char of an encoded String represents a 15-bit chunk
  * from the byte sequence.  Base 8000h was chosen because it allows for all
  * lower 15 bits of char to be used without restriction; the surrogate range 
  * [U+D8000-U+DFFF] does not represent valid chars, and would require
  * complicated handling to avoid them and allow use of char's high bit.
- * 
+ * <p/>
  * Although unset bits are used as padding in the final char, the original
  * byte sequence could contain trailing bytes with no set bits (null bytes):
  * padding is indistinguishable from valid information.  To overcome this
  * problem, a char is appended, indicating the number of encoded bytes in the
  * final content char.
- * 
- * This class's operations are defined over CharBuffers and ByteBuffers, to
- * allow for wrapped arrays to be reused, reducing memory allocation costs for
- * repeated operations.  Note that this class calls array() and arrayOffset()
+ * <p/>
+ * Some methods in this class are defined over CharBuffers and ByteBuffers, but
+ * these are deprecated in favor of methods that operate directly on byte[] and
+ * char[] arrays.  Note that this class calls array() and arrayOffset()
  * on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be
- * used.  This class interprets the arrayOffset() and limit() values returned by
- * its input buffers as beginning and end+1 positions on the wrapped array,
+ * used.  This class interprets the arrayOffset() and limit() values returned 
+ * by its input buffers as beginning and end+1 positions on the wrapped array,
  * respectively; similarly, on the output buffer, arrayOffset() is the first
  * position written to, and limit() is set to one past the final output array
  * position.
+ * <p/>
+ * WARNING: This means that the deprecated Buffer-based methods 
+ * only work correctly with buffers that have an offset of 0. For example, they
+ * will not correctly interpret buffers returned by {@link ByteBuffer#slice}.  
  */
 public class IndexableBinaryStringTools {
 
@@ -68,204 +72,276 @@
   /**
    * Returns the number of chars required to encode the given byte sequence.
    * 
-   * @param original The byte sequence to be encoded.  Must be backed by an array.
+   * @param original The byte sequence to be encoded. Must be backed by an
+   *        array.
    * @return The number of chars required to encode the given byte sequence
-   * @throws IllegalArgumentException If the given ByteBuffer is not backed by an array
+   * @throws IllegalArgumentException If the given ByteBuffer is not backed by
+   *         an array
+   * @deprecated Use {@link #getEncodedLength(byte[], int, int)} instead. This
+   *             method will be removed in Lucene 4.0
    */
-  public static int getEncodedLength(ByteBuffer original) 
+  @Deprecated
+  public static int getEncodedLength(ByteBuffer original)
     throws IllegalArgumentException {
     if (original.hasArray()) {
-      // Use long for intermediaries to protect against overflow
-      long length = (long)(original.limit() - original.arrayOffset());
-      return (int)((length * 8L + 14L) / 15L) + 1;
+      return getEncodedLength(original.array(), original.arrayOffset(),
+          original.limit() - original.arrayOffset());
     } else {
       throw new IllegalArgumentException("original argument must have a backing array");
     }
   }
+  
+  /**
+   * Returns the number of chars required to encode the given bytes.
+   * 
+   * @param inputArray byte sequence to be encoded
+   * @param inputOffset initial offset into inputArray
+   * @param inputLength number of bytes in inputArray
+   * @return The number of chars required to encode the number of bytes.
+   */
+  public static int getEncodedLength(byte[] inputArray, int inputOffset,
+      int inputLength) {
+    // Use long for intermediaries to protect against overflow
+    return (int)(((long)inputLength * 8L + 14L) / 15L) + 1;
+  }
+
 
   /**
    * Returns the number of bytes required to decode the given char sequence.
    * 
-   * @param encoded The char sequence to be encoded.  Must be backed by an array.
+   * @param encoded The char sequence to be decoded. Must be backed by an array.
    * @return The number of bytes required to decode the given char sequence
-   * @throws IllegalArgumentException If the given CharBuffer is not backed by an array
+   * @throws IllegalArgumentException If the given CharBuffer is not backed by
+   *         an array
+   * @deprecated Use {@link #getDecodedLength(char[], int, int)} instead. This
+   *             method will be removed in Lucene 4.0
    */
+  @Deprecated
   public static int getDecodedLength(CharBuffer encoded) 
     throws IllegalArgumentException {
     if (encoded.hasArray()) {
-      int numChars = encoded.limit() - encoded.arrayOffset() - 1;
-      if (numChars <= 0) {
-        return 0;
-      } else {
-        int numFullBytesInFinalChar = encoded.charAt(encoded.limit() - 1);
-        int numEncodedChars = numChars - 1;
-        return (numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar;
-      }
+      return getDecodedLength(encoded.array(), encoded.arrayOffset(), 
+          encoded.limit() - encoded.arrayOffset());
     } else {
       throw new IllegalArgumentException("encoded argument must have a backing array");
     }
   }
+  
+  /**
+   * Returns the number of bytes required to decode the given char sequence.
+   * 
+   * @param encoded char sequence to be decoded
+   * @param offset initial offset
+   * @param length number of characters
+   * @return The number of bytes required to decode the given char sequence
+   */
+  public static int getDecodedLength(char[] encoded, int offset, int length) {
+    final int numChars = length - 1;
+    if (numChars <= 0) {
+      return 0;
+    } else {
+      // Use long for intermediaries to protect against overflow
+      final long numFullBytesInFinalChar = encoded[offset + length - 1];
+      final long numEncodedChars = numChars - 1;
+      return (int)((numEncodedChars * 15L + 7L) / 8L + numFullBytesInFinalChar);
+    }
+  }
 
   /**
-   * Encodes the input byte sequence into the output char sequence.  Before
+   * Encodes the input byte sequence into the output char sequence. Before
    * calling this method, ensure that the output CharBuffer has sufficient
    * capacity by calling {@link #getEncodedLength(java.nio.ByteBuffer)}.
    * 
    * @param input The byte sequence to encode
-   * @param output Where the char sequence encoding result will go.  The limit
-   *  is set to one past the position of the final char.
+   * @param output Where the char sequence encoding result will go. The limit is
+   *        set to one past the position of the final char.
    * @throws IllegalArgumentException If either the input or the output buffer
-   *  is not backed by an array
+   *         is not backed by an array
+   * @deprecated Use {@link #encode(byte[], int, int, char[], int, int)}
+   *             instead. This method will be removed in Lucene 4.0
    */
+  @Deprecated
   public static void encode(ByteBuffer input, CharBuffer output) {
     if (input.hasArray() && output.hasArray()) {
-      byte[] inputArray = input.array();
-      int inputOffset = input.arrayOffset();
-      int inputLength = input.limit() - inputOffset; 
-      char[] outputArray = output.array();
-      int outputOffset = output.arrayOffset();
-      int outputLength = getEncodedLength(input);
-      output.limit(outputOffset + outputLength); // Set output final pos + 1
+      final int inputOffset = input.arrayOffset();
+      final int inputLength = input.limit() - inputOffset;
+      final int outputOffset = output.arrayOffset();
+      final int outputLength = getEncodedLength(input.array(), inputOffset,
+          inputLength);
+      output.limit(outputLength + outputOffset);
       output.position(0);
-      if (inputLength > 0) {
-        int inputByteNum = inputOffset;
-        int caseNum = 0;
-        int outputCharNum = outputOffset;
-        CodingCase codingCase;
-        for ( ; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength ;
-              ++outputCharNum                                                 ) {
-          codingCase = CODING_CASES[caseNum];
-          if (2 == codingCase.numBytes) {
-            outputArray[outputCharNum]
-              = (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
-                       + (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift)
-                          & codingCase.finalMask)
-                       & (short)0x7FFF);
-          } else { // numBytes is 3
-            outputArray[outputCharNum] 
-              = (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
-                       + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
-                       + (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift)

-                          & codingCase.finalMask)
-                       & (short)0x7FFF);          
-          }
-          inputByteNum += codingCase.advanceBytes;          
-          if (++caseNum == CODING_CASES.length) {
-            caseNum = 0;
-          }
-        }
-        // Produce final char (if any) and trailing count chars.
+      encode(input.array(), inputOffset, inputLength, output.array(),
+          outputOffset, outputLength);
+    } else {
+      throw new IllegalArgumentException("Arguments must have backing arrays");
+    }
+  }
+  
+  /**
+   * Encodes the input byte sequence into the output char sequence.  Before
+   * calling this method, ensure that the output array has sufficient
+   * capacity by calling {@link #getEncodedLength(byte[], int, int)}.
+   * 
+   * @param inputArray byte sequence to be encoded
+   * @param inputOffset initial offset into inputArray
+   * @param inputLength number of bytes in inputArray
+   * @param outputArray char sequence to store encoded result
+   * @param outputOffset initial offset into outputArray
+   * @param outputLength length of output, must be getEncodedLength
+   */
+  public static void encode(byte[] inputArray, int inputOffset,
+      int inputLength, char[] outputArray, int outputOffset, int outputLength) {
+    assert (outputLength == getEncodedLength(inputArray, inputOffset,
+        inputLength));
+    if (inputLength > 0) {
+      int inputByteNum = inputOffset;
+      int caseNum = 0;
+      int outputCharNum = outputOffset;
+      CodingCase codingCase;
+      for (; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength; ++outputCharNum)
{
         codingCase = CODING_CASES[caseNum];
-        
-        if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
-          outputArray[outputCharNum++] 
-            = (char)((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
-                      + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift))
-                     & (short)0x7FFF);
-          // Add trailing char containing the number of full bytes in final char
-          outputArray[outputCharNum++] = (char)1;
-        } else if (inputByteNum < inputLength) {
-          outputArray[outputCharNum++] 
-            = (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
-                     & (short)0x7FFF);
-          // Add trailing char containing the number of full bytes in final char
-          outputArray[outputCharNum++] = caseNum == 0 ? (char)1 : (char)0;
-        } else { // No left over bits - last char is completely filled.
-          // Add trailing char containing the number of full bytes in final char
-          outputArray[outputCharNum++] = (char)1;
+        if (2 == codingCase.numBytes) {
+          outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) <<
codingCase.initialShift)
+              + (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift)
& codingCase.finalMask) & (short) 0x7FFF);
+        } else { // numBytes is 3
+          outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) <<
codingCase.initialShift)
+              + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
+              + (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift)
& codingCase.finalMask) & (short) 0x7FFF);
+        }
+        inputByteNum += codingCase.advanceBytes;
+        if (++caseNum == CODING_CASES.length) {
+          caseNum = 0;
         }
       }
-    } else {
-      throw new IllegalArgumentException("Arguments must have backing arrays");
+      // Produce final char (if any) and trailing count chars.
+      codingCase = CODING_CASES[caseNum];
+
+      if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
+        outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) <<
codingCase.initialShift) + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift))
& (short) 0x7FFF);
+        // Add trailing char containing the number of full bytes in final char
+        outputArray[outputCharNum++] = (char) 1;
+      } else if (inputByteNum < inputLength) {
+        outputArray[outputCharNum++] = (char) (((inputArray[inputByteNum] & 0xFF) <<
codingCase.initialShift) & (short) 0x7FFF);
+        // Add trailing char containing the number of full bytes in final char
+        outputArray[outputCharNum++] = caseNum == 0 ? (char) 1 : (char) 0;
+      } else { // No left over bits - last char is completely filled.
+        // Add trailing char containing the number of full bytes in final char
+        outputArray[outputCharNum++] = (char) 1;
+      }
     }
   }
 
   /**
-   * Decodes the input char sequence into the output byte sequence.  Before
+   * Decodes the input char sequence into the output byte sequence. Before
    * calling this method, ensure that the output ByteBuffer has sufficient
    * capacity by calling {@link #getDecodedLength(java.nio.CharBuffer)}.
    * 
    * @param input The char sequence to decode
-   * @param output Where the byte sequence decoding result will go.  The limit
-   *  is set to one past the position of the final char.
+   * @param output Where the byte sequence decoding result will go. The limit is
+   *        set to one past the position of the final char.
    * @throws IllegalArgumentException If either the input or the output buffer
-   *  is not backed by an array
+   *         is not backed by an array
+   * @deprecated Use {@link #decode(char[], int, int, byte[], int, int)}
+   *             instead. This method will be removed in Lucene 4.0
    */
+  @Deprecated
   public static void decode(CharBuffer input, ByteBuffer output) {
     if (input.hasArray() && output.hasArray()) {
-      int numInputChars = input.limit() - input.arrayOffset() - 1;
-      int numOutputBytes = getDecodedLength(input);
-      output.limit(numOutputBytes + output.arrayOffset()); // Set output final pos + 1
+      final int inputOffset = input.arrayOffset();
+      final int inputLength = input.limit() - inputOffset;
+      final int outputOffset = output.arrayOffset();
+      final int outputLength = getDecodedLength(input.array(), inputOffset,
+          inputLength);
+      output.limit(outputLength + outputOffset);
       output.position(0);
-      byte[] outputArray = output.array();
-      char[] inputArray = input.array();
-      if (numOutputBytes > 0) {
-        int caseNum = 0;
-        int outputByteNum = output.arrayOffset();
-        int inputCharNum = input.arrayOffset();
-        short inputChar;
-        CodingCase codingCase;
-        for ( ; inputCharNum < numInputChars - 1 ; ++inputCharNum) {
-          codingCase = CODING_CASES[caseNum];
-          inputChar = (short)inputArray[inputCharNum];
-          if (2 == codingCase.numBytes) {
-            if (0 == caseNum) {
-              outputArray[outputByteNum] = (byte)(inputChar >>> codingCase.initialShift);
-            } else {
-              outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
-            }
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask)

-                                                    << codingCase.finalShift);
-          } else { // numBytes is 3
-            outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
-                                                    >>> codingCase.middleShift);
-            outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask)

-                                                    << codingCase.finalShift);
-          }
-          outputByteNum += codingCase.advanceBytes;
-          if (++caseNum == CODING_CASES.length) {
-            caseNum = 0;
+      decode(input.array(), inputOffset, inputLength, output.array(),
+          outputOffset, outputLength);
+    } else {
+      throw new IllegalArgumentException("Arguments must have backing arrays");
+    }
+  }
+
+  /**
+   * Decodes the input char sequence into the output byte sequence. Before
+   * calling this method, ensure that the output array has sufficient capacity
+   * by calling {@link #getDecodedLength(char[], int, int)}.
+   * 
+   * @param inputArray char sequence to be decoded
+   * @param inputOffset initial offset into inputArray
+   * @param inputLength number of chars in inputArray
+   * @param outputArray byte sequence to store encoded result
+   * @param outputOffset initial offset into outputArray
+   * @param outputLength length of output, must be
+   *        getDecodedLength(inputArray, inputOffset, inputLength)
+   */
+  public static void decode(char[] inputArray, int inputOffset,
+      int inputLength, byte[] outputArray, int outputOffset, int outputLength) {
+    assert (outputLength == getDecodedLength(inputArray, inputOffset,
+        inputLength));
+    final int numInputChars = inputLength - 1;
+    final int numOutputBytes = outputLength;
+
+    if (numOutputBytes > 0) {
+      int caseNum = 0;
+      int outputByteNum = outputOffset;
+      int inputCharNum = inputOffset;
+      short inputChar;
+      CodingCase codingCase;
+      for (; inputCharNum < numInputChars - 1; ++inputCharNum) {
+        codingCase = CODING_CASES[caseNum];
+        inputChar = (short) inputArray[inputCharNum];
+        if (2 == codingCase.numBytes) {
+          if (0 == caseNum) {
+            outputArray[outputByteNum] = (byte) (inputChar >>> codingCase.initialShift);
+          } else {
+            outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
           }
+          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask)
<< codingCase.finalShift);
+        } else { // numBytes is 3
+          outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
+          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask)
>>> codingCase.middleShift);
+          outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask)
<< codingCase.finalShift);
         }
-        // Handle final char
-        inputChar = (short)inputArray[inputCharNum];
-        codingCase = CODING_CASES[caseNum];
-        if (0 == caseNum) {
-          outputArray[outputByteNum] = 0;
+        outputByteNum += codingCase.advanceBytes;
+        if (++caseNum == CODING_CASES.length) {
+          caseNum = 0;
         }
-        outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
-        int bytesLeft = numOutputBytes - outputByteNum;
-        if (bytesLeft > 1) {
-          if (2 == codingCase.numBytes) {
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask)

-                                                    >>> codingCase.finalShift);
-          } else { // numBytes is 3
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
-                                                    >>> codingCase.middleShift);
-            if (bytesLeft > 2) {
-              outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask)

-                                                      << codingCase.finalShift);
-            }
+      }
+      // Handle final char
+      inputChar = (short) inputArray[inputCharNum];
+      codingCase = CODING_CASES[caseNum];
+      if (0 == caseNum) {
+        outputArray[outputByteNum] = 0;
+      }
+      outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
+      final int bytesLeft = numOutputBytes - outputByteNum;
+      if (bytesLeft > 1) {
+        if (2 == codingCase.numBytes) {
+          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask)
>>> codingCase.finalShift);
+        } else { // numBytes is 3
+          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask)
>>> codingCase.middleShift);
+          if (bytesLeft > 2) {
+            outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask)
<< codingCase.finalShift);
           }
         }
       }
-    } else {
-      throw new IllegalArgumentException("Arguments must have backing arrays");
     }
   }
 
   /**
    * Decodes the given char sequence, which must have been encoded by
-   * {@link #encode(java.nio.ByteBuffer)} or 
+   * {@link #encode(java.nio.ByteBuffer)} or
    * {@link #encode(java.nio.ByteBuffer, java.nio.CharBuffer)}.
    * 
    * @param input The char sequence to decode
-   * @return A byte sequence containing the decoding result.  The limit
-   *  is set to one past the position of the final char.
+   * @return A byte sequence containing the decoding result. The limit is set to
+   *         one past the position of the final char.
    * @throws IllegalArgumentException If the input buffer is not backed by an
-   *  array
+   *         array
+   * @deprecated Use {@link #decode(char[], int, int, byte[], int, int)}
+   *             instead. This method will be removed in Lucene 4.0
    */
+  @Deprecated
   public static ByteBuffer decode(CharBuffer input) {
     byte[] outputArray = new byte[getDecodedLength(input)];
     ByteBuffer output = ByteBuffer.wrap(outputArray);
@@ -277,11 +353,14 @@
    * Encodes the input byte sequence.
    * 
    * @param input The byte sequence to encode
-   * @return A char sequence containing the encoding result.  The limit is set
-   *  to one past the position of the final char.
+   * @return A char sequence containing the encoding result. The limit is set to
+   *         one past the position of the final char.
    * @throws IllegalArgumentException If the input buffer is not backed by an
-   *  array
+   *         array
+   * @deprecated Use {@link #encode(byte[], int, int, char[], int, int)}
+   *             instead. This method will be removed in Lucene 4.0
    */
+  @Deprecated
   public static CharBuffer encode(ByteBuffer input) {
     char[] outputArray = new char[getEncodedLength(input)];
     CharBuffer output = CharBuffer.wrap(outputArray);

Modified: lucene/java/trunk/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java?rev=895341&r1=895340&r2=895341&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
(original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
Sun Jan  3 09:22:40 2010
@@ -25,7 +25,9 @@
   private static final int NUM_RANDOM_TESTS = 2000;
   private static final int MAX_RANDOM_BINARY_LENGTH = 300;
   
-  public void testSingleBinaryRoundTrip() {
+  /** @deprecated remove this test for Lucene 4.0 */
+  @Deprecated
+  public void testSingleBinaryRoundTripNIO() {
     byte[] binary = new byte[] 
       { (byte)0x23, (byte)0x98, (byte)0x13, (byte)0xE4, (byte)0x76, (byte)0x41,
         (byte)0xB2, (byte)0xC9, (byte)0x7F, (byte)0x0A, (byte)0xA6, (byte)0xD8 };
@@ -35,15 +37,44 @@
     ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
     assertEquals("Round trip decode/decode returned different results:"
                  + System.getProperty("line.separator")
-                 + "original: " + binaryDump(binaryBuf)
+                 + "original: " + binaryDumpNIO(binaryBuf)
                  + System.getProperty("line.separator")
-                 + " encoded: " + charArrayDump(encoded)
+                 + " encoded: " + charArrayDumpNIO(encoded)
                  + System.getProperty("line.separator")
-                 + " decoded: " + binaryDump(decoded),
+                 + " decoded: " + binaryDumpNIO(decoded),
                  binaryBuf, decoded);
   }
   
-  public void testEncodedSortability() {
+  public void testSingleBinaryRoundTrip() {
+    byte[] binary = new byte[] { (byte) 0x23, (byte) 0x98, (byte) 0x13,
+        (byte) 0xE4, (byte) 0x76, (byte) 0x41, (byte) 0xB2, (byte) 0xC9,
+        (byte) 0x7F, (byte) 0x0A, (byte) 0xA6, (byte) 0xD8 };
+
+    int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
+        binary.length);
+    char encoded[] = new char[encodedLen];
+    IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
+        encoded.length);
+
+    int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
+        encoded.length);
+    byte decoded[] = new byte[decodedLen];
+    IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
+        decoded.length);
+
+    assertEquals("Round trip decode/decode returned different results:"
+        + System.getProperty("line.separator") + "original: "
+        + binaryDump(binary, binary.length)
+        + System.getProperty("line.separator") + " encoded: "
+        + charArrayDump(encoded, encoded.length)
+        + System.getProperty("line.separator") + " decoded: "
+        + binaryDump(decoded, decoded.length),
+        binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
+  }
+  
+  /** @deprecated remove this test for Lucene 4.0 */
+  @Deprecated
+  public void testEncodedSortabilityNIO() {
     Random random = newRandom();
     byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
     ByteBuffer originalBuf1 = ByteBuffer.wrap(originalArray1);
@@ -88,19 +119,85 @@
       assertEquals("Test #" + (testNum + 1) 
                    + ": Original bytes and encoded chars compare differently:"
                    + System.getProperty("line.separator")
-                   + " binary 1: " + binaryDump(originalBuf1)
+                   + " binary 1: " + binaryDumpNIO(originalBuf1)
                    + System.getProperty("line.separator")
-                   + " binary 2: " + binaryDump(originalBuf2)
+                   + " binary 2: " + binaryDumpNIO(originalBuf2)
                    + System.getProperty("line.separator")
-                   + "encoded 1: " + charArrayDump(encodedBuf1)
+                   + "encoded 1: " + charArrayDumpNIO(encodedBuf1)
                    + System.getProperty("line.separator")
-                   + "encoded 2: " + charArrayDump(encodedBuf2)
+                   + "encoded 2: " + charArrayDumpNIO(encodedBuf2)
                    + System.getProperty("line.separator"),
                    originalComparison, encodedComparison);
     }
   }
-  
-  public void testEmptyInput() {
+
+  public void testEncodedSortability() {
+    Random random = newRandom();
+    byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
+    char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH];
+    char[] encoded1 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
+    byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH];
+    char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH];
+    char[] encoded2 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
+
+    for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
+      int numBytes1 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
+
+      for (int byteNum = 0; byteNum < numBytes1; ++byteNum) {
+        int randomInt = random.nextInt(0x100);
+        originalArray1[byteNum] = (byte) randomInt;
+        originalString1[byteNum] = (char) randomInt;
+      }
+
+      int numBytes2 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
+
+      for (int byteNum = 0; byteNum < numBytes2; ++byteNum) {
+        int randomInt = random.nextInt(0x100);
+        original2[byteNum] = (byte) randomInt;
+        originalString2[byteNum] = (char) randomInt;
+      }
+      int originalComparison = new String(originalString1, 0, numBytes1)
+          .compareTo(new String(originalString2, 0, numBytes2));
+      originalComparison = originalComparison < 0 ? -1
+          : originalComparison > 0 ? 1 : 0;
+
+      int encodedLen1 = IndexableBinaryStringTools.getEncodedLength(
+          originalArray1, 0, numBytes1);
+      if (encodedLen1 > encoded1.length)
+        encoded1 = new char[ArrayUtil.getNextSize(encodedLen1)];
+      IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1,
+          0, encodedLen1);
+
+      int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2,
+          0, numBytes2);
+      if (encodedLen2 > encoded2.length)
+        encoded2 = new char[ArrayUtil.getNextSize(encodedLen2)];
+      IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0,
+          encodedLen2);
+
+      int encodedComparison = new String(encoded1, 0, encodedLen1)
+          .compareTo(new String(encoded2, 0, encodedLen2));
+      encodedComparison = encodedComparison < 0 ? -1
+          : encodedComparison > 0 ? 1 : 0;
+
+      assertEquals("Test #" + (testNum + 1)
+          + ": Original bytes and encoded chars compare differently:"
+          + System.getProperty("line.separator") + " binary 1: "
+          + binaryDump(originalArray1, numBytes1)
+          + System.getProperty("line.separator") + " binary 2: "
+          + binaryDump(original2, numBytes2)
+          + System.getProperty("line.separator") + "encoded 1: "
+          + charArrayDump(encoded1, encodedLen1)
+          + System.getProperty("line.separator") + "encoded 2: "
+          + charArrayDump(encoded2, encodedLen2)
+          + System.getProperty("line.separator"), originalComparison,
+          encodedComparison);
+    }
+  }
+
+  /** @deprecated remove this test for Lucene 4.0 */
+  @Deprecated
+  public void testEmptyInputNIO() {
     byte[] binary = new byte[0];
     CharBuffer encoded = IndexableBinaryStringTools.encode(ByteBuffer.wrap(binary));
     ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
@@ -108,7 +205,27 @@
     assertEquals("decoded empty input was not empty", decoded.limit(), 0);
   }
   
-  public void testAllNullInput() {
+  public void testEmptyInput() {
+    byte[] binary = new byte[0];
+
+    int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
+        binary.length);
+    char[] encoded = new char[encodedLen];
+    IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
+        encoded.length);
+
+    int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
+        encoded.length);
+    byte[] decoded = new byte[decodedLen];
+    IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
+        decoded.length);
+
+    assertEquals("decoded empty input was not empty", decoded.length, 0);
+  }
+  
+  /** @deprecated remove this test for Lucene 4.0 */
+  @Deprecated
+  public void testAllNullInputNIO() {
     byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
     ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
     CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf);
@@ -117,13 +234,38 @@
     assertNotNull("decode() returned null", decodedBuf);
     assertEquals("Round trip decode/decode returned different results:"
                  + System.getProperty("line.separator")
-                 + "  original: " + binaryDump(binaryBuf)
+                 + "  original: " + binaryDumpNIO(binaryBuf)
                  + System.getProperty("line.separator")
-                 + "decodedBuf: " + binaryDump(decodedBuf),
+                 + "decodedBuf: " + binaryDumpNIO(decodedBuf),
                  binaryBuf, decodedBuf);
   }
   
-  public void testRandomBinaryRoundTrip() {
+  public void testAllNullInput() {
+    byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
+        binary.length);
+    char encoded[] = new char[encodedLen];
+    IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
+        encoded.length);
+
+    int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
+        encoded.length);
+    byte[] decoded = new byte[decodedLen];
+    IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
+        decoded.length);
+
+    assertEquals("Round trip decode/decode returned different results:"
+        + System.getProperty("line.separator") + "  original: "
+        + binaryDump(binary, binary.length)
+        + System.getProperty("line.separator") + "decodedBuf: "
+        + binaryDump(decoded, decoded.length),
+        binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
+  }
+  
+  /** @deprecated remove this test for Lucene 4.0 */
+  @Deprecated
+  public void testRandomBinaryRoundTripNIO() {
     Random random = newRandom();
     byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
     ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
@@ -142,19 +284,59 @@
       assertEquals("Test #" + (testNum + 1) 
                    + ": Round trip decode/decode returned different results:"
                    + System.getProperty("line.separator")
-                   + "  original: " + binaryDump(binaryBuf)
+                   + "  original: " + binaryDumpNIO(binaryBuf)
                    + System.getProperty("line.separator")
-                   + "encodedBuf: " + charArrayDump(encodedBuf)
+                   + "encodedBuf: " + charArrayDumpNIO(encodedBuf)
                    + System.getProperty("line.separator")
-                   + "decodedBuf: " + binaryDump(decodedBuf),
+                   + "decodedBuf: " + binaryDumpNIO(decodedBuf),
                    binaryBuf, decodedBuf);
     }
   }
+
+  public void testRandomBinaryRoundTrip() {
+    Random random = newRandom();
+    byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
+    char[] encoded = new char[MAX_RANDOM_BINARY_LENGTH * 10];
+    byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH];
+    for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
+      int numBytes = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1      
                                                            
+
+      for (int byteNum = 0; byteNum < numBytes; ++byteNum) {
+        binary[byteNum] = (byte) random.nextInt(0x100);
+      }
+
+      int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
+          numBytes);
+      if (encoded.length < encodedLen)
+        encoded = new char[ArrayUtil.getNextSize(encodedLen)];
+      IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0,
+          encodedLen);
+
+      int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
+          encodedLen);
+      IndexableBinaryStringTools.decode(encoded, 0, encodedLen, decoded, 0,
+          decodedLen);
+
+      assertEquals("Test #" + (testNum + 1)
+          + ": Round trip decode/decode returned different results:"
+          + System.getProperty("line.separator") + "  original: "
+          + binaryDump(binary, numBytes) + System.getProperty("line.separator")
+          + "encodedBuf: " + charArrayDump(encoded, encodedLen)
+          + System.getProperty("line.separator") + "decodedBuf: "
+          + binaryDump(decoded, decodedLen), binaryDump(binary, numBytes),
+          binaryDump(decoded, decodedLen));
+    }
+  }
   
-  public String binaryDump(ByteBuffer binaryBuf) {
+  /** @deprecated remove this method for Lucene 4.0 */
+  @Deprecated
+  public String binaryDumpNIO(ByteBuffer binaryBuf) {
+    return binaryDump(binaryBuf.array(), 
+        binaryBuf.limit() - binaryBuf.arrayOffset());
+  }
+
+  public String binaryDump(byte[] binary, int numBytes) {
     StringBuilder buf = new StringBuilder();
-    int numBytes = binaryBuf.limit() - binaryBuf.arrayOffset();
-    byte[] binary = binaryBuf.array();
     for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
       String hex = Integer.toHexString((int)binary[byteNum] & 0xFF);
       if (hex.length() == 1) {
@@ -167,11 +349,15 @@
     }
     return buf.toString();
   }
-
-  public String charArrayDump(CharBuffer charBuf) {
+  /** @deprecated remove this method for Lucene 4.0 */
+  @Deprecated
+  public String charArrayDumpNIO(CharBuffer charBuf) {
+    return charArrayDump(charBuf.array(), 
+        charBuf.limit() - charBuf.arrayOffset());
+  }
+  
+  public String charArrayDump(char[] charArray, int numBytes) {
     StringBuilder buf = new StringBuilder();
-    int numBytes = charBuf.limit() - charBuf.arrayOffset();
-    char[] charArray = charBuf.array();
     for (int charNum = 0 ; charNum < numBytes ; ++charNum) {
       String hex = Integer.toHexString((int)charArray[charNum]);
       for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) {



Mime
View raw message