db-derby-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From krist...@apache.org
Subject svn commit: r958522 - in /db/derby/code/trunk/java: engine/org/apache/derby/impl/jdbc/ engine/org/apache/derby/loc/ shared/org/apache/derby/shared/common/reference/ testing/org/apache/derbyTesting/perf/basic/jdbc/
Date Mon, 28 Jun 2010 09:53:38 GMT
Author: kristwaa
Date: Mon Jun 28 09:53:38 2010
New Revision: 958522

URL: http://svn.apache.org/viewvc?rev=958522&view=rev
Log:
DERBY-4241: Improve transition from read-only to writable Clob representation

When a store stream Clob is going to be modified, it will be written out to the temporary
area of Derby and represented as a TemporaryClob.
The transfer of the data is done in a sub-optimal manner for two reasons;
 o for transfer of the complete Clob, the copy method operates on the byte level and we're
not able to save the character length.
 o for transfer of parts of the Clob (i.e. truncation), we have to first decode the UTF-8
encoding to find the byte count and then transfer the same bytes.

This fix addresses both issues by improving the length caching and by adding
a method that counts the charactes "on-the-fly" while copying raw bytes.

Patch file: derby-4241-2b-utf8AwareCopy.diff

Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/LOBStreamControl.java
    db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/TemporaryClob.java
    db/derby/code/trunk/java/engine/org/apache/derby/loc/messages.xml
    db/derby/code/trunk/java/shared/org/apache/derby/shared/common/reference/MessageId.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/basic/jdbc/ClobAccessTest.java

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/LOBStreamControl.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/LOBStreamControl.java?rev=958522&r1=958521&r2=958522&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/LOBStreamControl.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/LOBStreamControl.java Mon Jun
28 09:53:38 2010
@@ -26,16 +26,19 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.UTFDataFormatException;
 import java.security.AccessController;
 import java.security.PrivilegedActionException;
 import java.security.PrivilegedExceptionAction;
 import org.apache.derby.iapi.error.StandardException;
 import org.apache.derby.iapi.reference.Property;
 import org.apache.derby.iapi.reference.SQLState;
+import org.apache.derby.iapi.services.i18n.MessageService;
 import org.apache.derby.iapi.services.monitor.Monitor;
 import org.apache.derby.iapi.store.raw.data.DataFactory;
 import org.apache.derby.io.StorageFile;
 import org.apache.derby.shared.common.error.ExceptionUtil;
+import org.apache.derby.shared.common.reference.MessageId;
 
 /**
  * This class acts as a layer of blob/clob repository (in memory or file).
@@ -381,8 +384,9 @@ class LOBStreamControl {
             if (len == -1) {
                 if (length != Long.MAX_VALUE) {
                     // We reached EOF before all the requested bytes are read.
-                    throw new EOFException("Reached end-of-stream " +
-                        "prematurely at " + sz + ", expected " + length);
+                    throw new EOFException(MessageService.getTextMessage(
+                            MessageId.STREAM_PREMATURE_EOF,
+                            new Long(length), new Long(sz)));
                 } else {
                     // End of data, but no length checking.
                     break;
@@ -391,9 +395,10 @@ class LOBStreamControl {
             write(data, 0, len, sz);
             sz += len;
         }
-        // If we copied until EOF, see if we have a Derby end-of-stream marker.
-        if (length == Long.MAX_VALUE) {
-            long curLength = getLength();
+        // If we copied until EOF, and we read more data than the length of the
+        // marker, see if we have a Derby end-of-stream marker.
+        long curLength = getLength();
+        if (length == Long.MAX_VALUE && curLength > 2) {
             byte[] eos = new byte[3];
             // Read the three last bytes, marker is 0xE0 0x00 0x00.
             read(eos, 0, 3, curLength -3);
@@ -405,6 +410,83 @@ class LOBStreamControl {
         }
     }
 
+    /**
+     * Copies UTF-8 encoded chars from a stream to local storage.
+     * <p>
+     * Note that specifying the length as {@code Long.MAX_VALUE} results in
+     * reading data from the stream until EOF is reached, but no length checking
+     * will be performed.
+     *
+     * @param utf8Stream the stream to copy from
+     * @param charLength number of chars to be copied, or {@code Long.MAX_VALUE}
+     *      to copy everything until EOF is reached
+     * @return The number of characters copied.
+     * @throws EOFException if EOF is reached prematurely
+     * @throws IOException thrown on a number of error conditions
+     * @throws StandardException if reading, writing or truncating the
+     *      {@code LOBStreamControl}-object fails
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
+     */
+    synchronized long copyUtf8Data(final InputStream utf8Stream,
+                                   final long charLength)
+            throws IOException, StandardException {
+        long charCount = 0; // Number of chars read
+        int offset = 0;     // Where to start looking for the start of a char
+        int read = 0;       // Number of bytes read
+        final byte[] buf = new byte[bufferSize];
+        while (charCount < charLength) {
+            int readNow = utf8Stream.read(buf, 0,
+                            (int)Math.min(buf.length, charLength - charCount));
+            if (readNow == -1) {
+                break;
+            }
+            // Count the characters.
+            while (offset < readNow) {
+                int c = buf[offset] & 0xFF;
+                if ((c & 0x80) == 0x00) { // 8th bit not set (top bit)
+                    offset++;
+                } else if ((c & 0x60) == 0x40) { // 7th bit set, 6th bit unset
+                    // Found char of two byte width.
+                    offset += 2;
+                } else if ((c & 0x70) == 0x60) { // 7th & 6th bit set, 5th unset
+                    // Found char of three byte width.
+                    offset += 3;
+                } else {
+                    // This shouldn't happen, as the data is coming from the
+                    // store and is supposed to be well-formed.
+                    // If it happens, fail and print some internal information.
+                    throw new UTFDataFormatException("Invalid UTF-8 encoding: "
+                            + Integer.toHexString(c) + ", charCount=" +
+                            charCount + ", offset=" + offset);
+                }
+                charCount++;
+            }
+            offset -= readNow; // Starting offset for next iteration
+            write(buf, 0, readNow, read);
+            read += readNow;
+        }
+        // See if an EOF-marker ended the stream. Don't check if we have fewer
+        // bytes than the marker length.
+        long curLength = getLength();
+        if (curLength > 2) {
+            byte[] eos = new byte[3];
+            // Read the three last bytes, marker is 0xE0 0x00 0x00.
+            read(eos, 0, 3, curLength -3);
+            if ((eos[0] & 0xFF) == 0xE0 && (eos[1] & 0xFF) == 0x00 &&
+                    (eos[2] & 0xFF) == 0x00) {
+                // Remove Derby end-of-stream-marker.
+                truncate(curLength -3);
+                charCount--;
+            }
+        }
+        if (charLength != Long.MAX_VALUE && charCount != charLength) {
+            throw new EOFException(MessageService.getTextMessage(
+                    MessageId.STREAM_PREMATURE_EOF,
+                    new Long(charLength), new Long(charCount)));
+        }
+        return charCount;
+    }
+
     protected void finalize() throws Throwable {
         free();
     }

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/TemporaryClob.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/TemporaryClob.java?rev=958522&r1=958521&r2=958522&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/TemporaryClob.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/TemporaryClob.java Mon Jun
28 09:53:38 2010
@@ -495,8 +495,18 @@ final class TemporaryClob implements Int
     private void copyClobContent(InternalClob clob)
             throws IOException, SQLException {
         try {
-            // Specify LONG.MAX_VALUE to copy data until EOF.
-            this.bytes.copyData(clob.getRawByteStream(), Long.MAX_VALUE);
+            long knownLength = clob.getCharLengthIfKnown();
+            if (knownLength == -1) {
+                // Decode UTF-8 data and copy until EOF, obtain char length.
+                this.cachedCharLength = this.bytes.copyUtf8Data(
+                        clob.getRawByteStream(), Long.MAX_VALUE);
+            } else {
+                // We already know the character length, and can copy raw bytes
+                // without decoding the UTF-8 data.
+                // Specify LONG.MAX_VALUE to copy data until EOF.
+                this.cachedCharLength = knownLength;
+                this.bytes.copyData(clob.getRawByteStream(), Long.MAX_VALUE);
+            }
         } catch (StandardException se) {
             throw Util.generateCsSQLException(se);
         }
@@ -515,13 +525,20 @@ final class TemporaryClob implements Int
     private void copyClobContent(InternalClob clob, long charLength)
             throws IOException, SQLException {
         try {
-            long byteLength = UTF8Util.skipFully(
-                    new BufferedInputStream(clob.getRawByteStream()),
-                    charLength);
-            this.bytes.copyData(
-                    new BufferedInputStream(clob.getRawByteStream()),
-                    byteLength);
-            this.cachedCharLength = charLength;
+            long knownLength = clob.getCharLengthIfKnown();
+            if (knownLength > charLength || knownLength == -1) {
+                // Decode and copy the requested number of chars.
+                this.cachedCharLength = this.bytes.copyUtf8Data(
+                    clob.getRawByteStream(), charLength);
+            } else if (knownLength == charLength) {
+                this.cachedCharLength = knownLength;
+                // Copy raw bytes until EOF.
+                // Special case optimization, avoids UTF-8 decoding.
+                this.bytes.copyData(clob.getRawByteStream(), Long.MAX_VALUE);
+            } else {
+                // The known length must be smaller than the requested length.
+                throw new EOFException();
+            }
         } catch (StandardException se) {
             throw Util.generateCsSQLException(se);
         }

Modified: db/derby/code/trunk/java/engine/org/apache/derby/loc/messages.xml
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/loc/messages.xml?rev=958522&r1=958521&r2=958522&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/loc/messages.xml (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/loc/messages.xml Mon Jun 28 09:53:38
2010
@@ -7610,6 +7610,13 @@ Shutting down instance {0} with class lo
                 <text>Stream read error on client side when transferring user data
to server.</text>
             </msg>
 
+            <msg>
+                <name>I029</name>
+                <text>Reached EOF prematurely; expected {0}, got {1}.</text>
+                <arg>expectedCount</arg>
+                <arg>gotCount</arg>
+            </msg>
+
         </family>
 
 

Modified: db/derby/code/trunk/java/shared/org/apache/derby/shared/common/reference/MessageId.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/shared/org/apache/derby/shared/common/reference/MessageId.java?rev=958522&r1=958521&r2=958522&view=diff
==============================================================================
--- db/derby/code/trunk/java/shared/org/apache/derby/shared/common/reference/MessageId.java
(original)
+++ db/derby/code/trunk/java/shared/org/apache/derby/shared/common/reference/MessageId.java
Mon Jun 28 09:53:38 2010
@@ -198,6 +198,8 @@ public interface MessageId {
      * user stream, which it is in the process of sending to the server.
      */
     String STREAM_DRDA_CLIENTSIDE_EXTDTA_READ_ERROR         = "I028";
+    /** The stream ended before it was supposed to. */
+    String STREAM_PREMATURE_EOF                             = "I029";
 
     /*
      * Monitor

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/basic/jdbc/ClobAccessTest.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/basic/jdbc/ClobAccessTest.java?rev=958522&r1=958521&r2=958522&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/basic/jdbc/ClobAccessTest.java
(original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/basic/jdbc/ClobAccessTest.java
Mon Jun 28 09:53:38 2010
@@ -153,6 +153,7 @@ public class ClobAccessTest
                     "testFetchLargeClobPieceByPieceModified",
                     "testLargeClobGetLength",
                     "testLargeClobGetLengthModified",
+                    "testLargeClobTruncateLengthMinusOne",
                     "testFetchLargeClobPieceByPieceBackwards",
                 };
             // See if the user has overridden which tests to run.
@@ -505,6 +506,23 @@ public class ClobAccessTest
     }
 
     /**
+     * Tests the speed of transferring data from the store to local temporary
+     * storage as part of the truncate operation.
+     */
+    public void testLargeClobTruncateLengthMinusOne()
+            throws SQLException {
+        // Select just one Clob.
+        PreparedStatement ps = prepareStatement(
+                "select dClob, length from largeClobs where id = 8");
+        ResultSet rs = ps.executeQuery();
+        while (rs.next()) {
+            Clob clob = rs.getClob(1);
+            int length = rs.getInt(2);
+            clob.truncate(length -1);
+        }
+    }
+
+    /**
      * Runs a test using multiple threads.
      * <p>
      * This test intends to detect problems with small Clobs and general



Mime
View raw message