Return-Path: Delivered-To: apmail-db-derby-commits-archive@www.apache.org Received: (qmail 84043 invoked from network); 8 Jan 2009 10:28:21 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 8 Jan 2009 10:28:21 -0000 Received: (qmail 43849 invoked by uid 500); 8 Jan 2009 10:28:21 -0000 Delivered-To: apmail-db-derby-commits-archive@db.apache.org Received: (qmail 43829 invoked by uid 500); 8 Jan 2009 10:28:21 -0000 Mailing-List: contact derby-commits-help@db.apache.org; run by ezmlm Precedence: bulk list-help: list-unsubscribe: List-Post: Reply-To: "Derby Development" List-Id: Delivered-To: mailing list derby-commits@db.apache.org Received: (qmail 43820 invoked by uid 99); 8 Jan 2009 10:28:21 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 08 Jan 2009 02:28:21 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 08 Jan 2009 10:28:18 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id B8BC32388896; Thu, 8 Jan 2009 02:27:57 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r732676 - in /db/derby/code/trunk/java: engine/org/apache/derby/iapi/types/ReaderToUTF8Stream.java testing/org/apache/derbyTesting/unitTests/junit/UTF8UtilTest.java Date: Thu, 08 Jan 2009 10:27:57 -0000 To: derby-commits@db.apache.org From: kristwaa@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090108102757.B8BC32388896@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kristwaa Date: Thu Jan 8 02:27:56 2009 New Revision: 732676 URL: http://svn.apache.org/viewvc?rev=732676&view=rev Log: DERBY-3907: Save useful length information for Clobs in store. Cleanup of ReaderToUTF8Stream, which has to deal with the header in the streams being passed in to Derby. Changes: o Simplified constructors. o Added JavaDoc and comments. o Removed unused imports. o Removed instance variable maximumLength. o Added more information to the error messages for truncation. o Added CHAR as a truncatable string data type. o Removed "throws IOException" from close. Updated the test to pass inn a valid type name to the constructor. Patch file: derby-3907-3b-readertoutf8stream_cleanup.diff Modified: db/derby/code/trunk/java/engine/org/apache/derby/iapi/types/ReaderToUTF8Stream.java db/derby/code/trunk/java/testing/org/apache/derbyTesting/unitTests/junit/UTF8UtilTest.java Modified: db/derby/code/trunk/java/engine/org/apache/derby/iapi/types/ReaderToUTF8Stream.java URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/iapi/types/ReaderToUTF8Stream.java?rev=732676&r1=732675&r2=732676&view=diff ============================================================================== --- db/derby/code/trunk/java/engine/org/apache/derby/iapi/types/ReaderToUTF8Stream.java (original) +++ db/derby/code/trunk/java/engine/org/apache/derby/iapi/types/ReaderToUTF8Stream.java Thu Jan 8 02:27:56 2009 @@ -25,17 +25,20 @@ import java.io.IOException; import java.io.EOFException; import java.io.Reader; -import java.io.UTFDataFormatException; import org.apache.derby.iapi.reference.SQLState; import org.apache.derby.iapi.services.i18n.MessageService; import org.apache.derby.iapi.services.io.DerbyIOException; import org.apache.derby.iapi.services.io.LimitReader; -import org.apache.derby.iapi.types.TypeId; +import org.apache.derby.iapi.services.sanity.SanityManager; /** - Converts a java.io.Reader to the on-disk UTF8 format used by Derby - for character types. -*/ + * Converts the characters served by a {@code java.io.Reader} to a stream + * returning the data in the on-disk modified UTF-8 encoded representation used + * by Derby. + *

+ * Length validation is performed. If required and allowed by the target column + * type, truncation of blanks will also be performed. + */ public final class ReaderToUTF8Stream extends InputStream { @@ -44,33 +47,36 @@ */ private LimitReader reader; - private byte[] buffer; + /** + * Size of buffer to hold the data read from stream and converted to the + * modified UTF-8 format. + */ + private final static int BUFSIZE = 32768; + private byte[] buffer = new byte[BUFSIZE]; private int boff; - private int blen; + private int blen = -1; private boolean eof; + /** Tells if the stream content is/was larger than the buffer size. */ private boolean multipleBuffer; - // buffer to hold the data read from stream - // and converted to UTF8 format - private final static int BUFSIZE = 32768; - /** Number of characters to truncate from this stream - The SQL standard allows for truncation of trailing spaces - for clobs,varchar,char. - If zero, no characters are truncated. + /** + * Number of characters to truncate from this stream. + * The SQL standard allows for truncation of trailing spaces for CLOB, + * VARCHAR and CHAR. If zero, no characters are truncated, unless the + * stream length execeeds the maximum length of the column we are inserting + * into. */ private final int charsToTruncate; private static final char SPACE = ' '; /** - * Length of the final value, after truncation if any, - * in characters. - this stream needs to fit into a column of colWidth - if truncation error happens ,then the error message includes - information about the column width. - */ + * If positive, length of the expected final value, after truncation if any, + * in characters. If negative, the maximum length allowed in the column we + * are inserting into. A negative value means we are working with a stream + * of unknown length, inserted through one of the JDBC 4.0 "lengthless + * override" methods. + */ private final int valueLength; - /** The maximum allowed length of the stream. */ - private final int maximumLength; /** The type name for the column data is inserted into. */ private final String typeName; @@ -83,7 +89,9 @@ * an exception is thrown during read. * * @param appReader application reader - * @param valueLength the length of the reader in characters + * @param valueLength the expected length of the reader in characters + * (positive), or the inverse (maxColWidth * -1) of the maximum column + * width if the expected stream length is unknown * @param numCharsToTruncate the number of trailing blanks to truncate * @param typeName type name of the column data is inserted into */ @@ -93,52 +101,58 @@ String typeName) { this.reader = new LimitReader(appReader); reader.setLimit(valueLength); - buffer = new byte[BUFSIZE]; - blen = -1; this.charsToTruncate = numCharsToTruncate; this.valueLength = valueLength; - this.maximumLength = -1; this.typeName = typeName; + if (SanityManager.DEBUG) { + // Check the type name + // The national types (i.e. NVARCHAR) are not used/supported. + SanityManager.ASSERT(typeName != null && ( + typeName.equals(TypeId.CHAR_NAME) || + typeName.equals(TypeId.VARCHAR_NAME) || + typeName.equals(TypeId.CLOB_NAME)) || + typeName.equals(TypeId.LONGVARCHAR_NAME)); + } } /** - * Create a UTF-8 stream for a length less application reader. - * - * A limit is placed on the length of the reader. If the reader exceeds - * the maximum length, truncation of trailing blanks is attempted. If - * truncation fails, an exception is thrown. + * Creates a UTF-8 stream for an application reader whose length isn't + * known at insertion time. + *

+ * The application reader is coming in through one of the "lengthless + * overrides" added in JDBC 4.0, for instance + * {@link java.sql.PreparedStatement#setCharacterStream(int,Reader)}. + * A limit is placed on the length of the application reader. If the reader + * exceeds the maximum length, truncation of trailing blanks is attempted. + * If truncation fails, an exception is thrown. * * @param appReader application reader * @param maximumLength maximum allowed length in number of characters for - * the reader + * the reader, typically the maximum field size * @param typeName type name of the column data is inserted into - * @throws IllegalArgumentException if maximum length is negative, or type - * name is null + * @throws IllegalArgumentException if maximum length is negative */ public ReaderToUTF8Stream(Reader appReader, int maximumLength, String typeName) { + this(appReader, -1 * maximumLength, 0, typeName); if (maximumLength < 0) { throw new IllegalArgumentException("Maximum length for a capped " + "stream cannot be negative: " + maximumLength); } - if (typeName == null) { - throw new IllegalArgumentException("Type name cannot be null"); - } - this.reader = new LimitReader(appReader); reader.setLimit(maximumLength); - buffer = new byte[BUFSIZE]; - blen = -1; - this.maximumLength = maximumLength; - this.typeName = typeName; - this.charsToTruncate = -1; - this.valueLength = -1; } /** - * read from stream; characters converted to utf-8 derby specific encoding. - * If stream has been read, and eof reached, in that case any subsequent - * read will throw an EOFException + * Reads a byte from the stream. + *

+ * Characters read from the source stream are converted to the UTF-8 Derby + * specific encoding. + * + * @return The byte read, or {@code -1} if the end-of-stream is reached. + * @throws EOFException if the end-of-stream has already been reached or + * the stream has been closed + * @throws IOException if reading from the source stream fails * @see java.io.InputStream#read() */ public int read() throws IOException { @@ -175,6 +189,19 @@ } + /** + * Reads up to {@code len} bytes from the stream. + *

+ * Characters read from the source stream are converted to the UTF-8 Derby + * specific encoding. + * + * @return The number of bytes read, or {@code -1} if the end-of-stream is + * reached. + * @throws EOFException if the end-of-stream has already been reached or + * the stream has been closed + * @throws IOException if reading from the source stream fails + * @see java.io.InputStream#read(byte[],int,int) + */ public int read(byte b[], int off, int len) throws IOException { // when stream has been read and eof reached, stream is closed @@ -230,6 +257,18 @@ return readCount; } + /** + * Fills the internal buffer with data read from the source stream. + *

+ * The characters read from the source are converted to the modified UTF-8 + * encoding, used as the on-disk format by Derby. + * + * @param startingOffset offset at which to start filling the buffer, used + * to avoid overwriting the stream header data on the first iteration + * @throws DerbyIOException if the source stream has an invalid length + * (different than specified), or if truncation of blanks fails + * @throws IOException if reading from the source stream fails + */ private void fillBuffer(int startingOffset) throws IOException { int off = boff = startingOffset; @@ -322,7 +361,10 @@ } else { throw new DerbyIOException( MessageService.getTextMessage( - SQLState.LANG_STRING_TRUNCATION), + SQLState.LANG_STRING_TRUNCATION, + typeName, + "", // Don't show the whole value. + String.valueOf(Math.abs(valueLength))), SQLState.LANG_STRING_TRUNCATION); } } @@ -354,6 +396,8 @@ return true; } else if (typeName.equals(TypeId.VARCHAR_NAME)) { return true; + } else if (typeName.equals(TypeId.CHAR_NAME)) { + return true; } return false; } @@ -374,8 +418,8 @@ MessageService.getTextMessage( SQLState.LANG_STRING_TRUNCATION, typeName, - "XXXX", - String.valueOf(valueLength)), + "", // Don't show the whole value. + String.valueOf(Math.abs(valueLength))), SQLState.LANG_STRING_TRUNCATION); } } @@ -384,8 +428,7 @@ /** * return resources */ - public void close() throws IOException - { + public void close() { // since stream has been read and eof reached, return buffer back to // the vm. // Instead of using another variable to indicate stream is closed @@ -395,8 +438,9 @@ /** * Return an optimized version of bytes available to read from - * the stream - * Note, it is not exactly per java.io.InputStream#available() + * the stream. + *

+ * Note, it is not exactly per {@code java.io.InputStream#available()}. */ public final int available() { @@ -409,4 +453,3 @@ return (BUFSIZE > remainingBytes ? remainingBytes : BUFSIZE); } } - Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/unitTests/junit/UTF8UtilTest.java URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/unitTests/junit/UTF8UtilTest.java?rev=732676&r1=732675&r2=732676&view=diff ============================================================================== --- db/derby/code/trunk/java/testing/org/apache/derbyTesting/unitTests/junit/UTF8UtilTest.java (original) +++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/unitTests/junit/UTF8UtilTest.java Thu Jan 8 02:27:56 2009 @@ -59,6 +59,9 @@ public class UTF8UtilTest extends BaseTestCase { + /** Type name passed to {@code ReaderToUTF8Stream}. */ + private static final String TYPENAME = "VARCHAR"; + /** * Creates a test of the specified name. */ @@ -78,7 +81,7 @@ InputStream ascii = new LoopingAlphabetStream(length); InputStream modUTF8 = new ReaderToUTF8Stream( new LoopingAlphabetReader(length), - length, 0, "ignored-test-type"); + length, 0, TYPENAME); modUTF8.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. assertEquals(ascii, modUTF8); } @@ -98,7 +101,7 @@ final int charLength = 5; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()), - charLength, 0, "ignored-test-type"); + charLength, 0, TYPENAME); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. assertEquals(charLength, UTF8Util.skipUntilEOF(in)); } @@ -114,7 +117,7 @@ final int charLength = 127019; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()), - charLength, 0, "ignored-test-type"); + charLength, 0, TYPENAME); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. assertEquals(charLength, UTF8Util.skipUntilEOF(in)); } @@ -130,7 +133,7 @@ final int charLength = 161019; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()), - charLength, 0, "ignored-test-type"); + charLength, 0, TYPENAME); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. // Returns count in bytes, we are using CJK chars so multiply length // with 3 to get expected number of bytes. @@ -148,7 +151,7 @@ final int charLength = 161019; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()), - charLength, 0, "ignored-test-type"); + charLength, 0, TYPENAME); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. try { UTF8Util.skipFully(in, charLength + 100); @@ -169,7 +172,7 @@ final int charLength = 10; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.cjkSubset()), - charLength, 0, "ignored-test-type"); + charLength, 0, TYPENAME); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. in.skip(1L); // Skip one more byte to trigger a UTF error. try { @@ -188,7 +191,7 @@ final int charLength = 161019; InputStream in = new ReaderToUTF8Stream( new LoopingAlphabetReader(charLength, CharAlphabet.tamil()), - charLength, 0, "ignored-test-type"); + charLength, 0, TYPENAME); in.skip(2L); // Skip encoded length added by ReaderToUTF8Stream. int firstSkip = 10078; assertEquals(firstSkip*3, UTF8Util.skipFully(in, firstSkip));