db-derby-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From krist...@apache.org
Subject svn commit: r544111 - in /db/derby/code/trunk/java/engine/org/apache/derby: iapi/util/UTF8Util.java impl/jdbc/InternalClob.java impl/jdbc/StoreStreamClob.java
Date Mon, 04 Jun 2007 09:50:31 GMT
Author: kristwaa
Date: Mon Jun  4 02:49:56 2007
New Revision: 544111

URL: http://svn.apache.org/viewvc?view=rev&rev=544111
Log:
DERBY-2646 (partial): Added StoreStreamClob, UTF8Util and added more JavaDoc for InternalClob.
This code is still not enabled. When it is, it will be used to operate on Clob represtented
as a stream in store.
Patch file: derby-2646-04b-storestreamclob_utf8util.diff

Added:
    db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/UTF8Util.java   (with props)
    db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/StoreStreamClob.java   (with
props)
Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/InternalClob.java

Added: db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/UTF8Util.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/UTF8Util.java?view=auto&rev=544111
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/UTF8Util.java (added)
+++ db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/UTF8Util.java Mon Jun  4 02:49:56
2007
@@ -0,0 +1,208 @@
+/*
+
+   Derby - Class org.apache.derby.iapi.util.UTF8Util
+
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+ */
+package org.apache.derby.iapi.util;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * Utility methods for handling UTF-8 encoded byte streams.
+ * <p>
+ * Note that when the <code>skip<code> methods mention detection of invalid
+ * UTF-8 encodings, it only checks the first byte of a character. For multibyte
+ * encodings, the second and third byte are not checked for correctness, just
+ * skipped and ignored.
+ *
+ * @see java.io.DataInput
+ */
+//@ThreadSafe
+public final class UTF8Util {
+
+    /** Constant used to look up character count in an array. */
+    private static final int CHAR_COUNT = 0;
+    /** Constant used to look up byte count in an array. */
+    private static final int BYTE_COUNT = 1;
+
+    /** This class cannot be instantiated. */
+    private UTF8Util() {}
+
+    /**
+     * Skip until the end-of-stream is reached.
+     *
+     * @param in byte stream with UTF-8 encoded characters
+     * @return The number of characters skipped.
+     * @throws IOException if reading from the stream fails
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
+     */
+    public static final long skipUntilEOF(InputStream in)
+            throws IOException {
+        // No need to do the skip in a loop, as Reader.read() returning -1
+        // means EOF has been reached.
+        // Note that a loop should be used if skip is used instead of read.
+        return internalSkip(in, Long.MAX_VALUE)[CHAR_COUNT];
+    }
+
+    /**
+     * Skip the requested number of characters from the stream.
+     * <p>
+     * @param in byte stream with UTF-8 encoded characters
+     * @param charsToSkip number of characters to skip
+     * @return The number of bytes skipped.
+     * @throws EOFException if end-of-stream is reached before the requested
+     *      number of characters are skipped
+     * @throws IOException if reading from the stream fails
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
+     */
+    public static final long skipFully(InputStream in, long charsToSkip)
+            throws EOFException, IOException {
+        long[] counts = internalSkip(in, charsToSkip);
+        if (counts[CHAR_COUNT] != charsToSkip) {
+            throw new EOFException("Reached end-of-stream prematurely at " +
+                "character/byte position " + counts[CHAR_COUNT] + "/" +
+                counts[BYTE_COUNT] + ", trying to skip " + charsToSkip);
+        }
+        return counts[BYTE_COUNT];
+    }
+
+    /**
+     * Skip characters in the stream.
+     * <p>
+     * Note that a smaller number than requested might be skipped if the
+     * end-of-stream is reached before the specified number of characters has
+     * been decoded. It is up to the caller to decide if this is an error
+     * or not. For instance, when determining the character length of a stream,
+     * <code>Long.MAX_VALUE</code> could be passed as the requested number of
+     * characters to skip.
+     *
+     * @param in byte stream with UTF-8 encoded characters
+     * @param charsToSkip the number of characters to skip
+     * @return A long array with counts; the characters skipped at position
+     *      <code>CHAR_COUNT<code>, the bytes skipped at position
+     *      <code>BYTE_COUNT</code>. Note that the number of characters skipped
+     *      may be smaller than the requested number.
+     * @throws IOException if reading from the stream fails
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
+     */
+    private static final long[] internalSkip(final InputStream in,
+                                             final long charsToSkip)
+            throws IOException {
+        long charsSkipped = 0;
+        long bytesSkipped = 0;
+        // Decoding routine for modified UTF-8.
+        // See java.io.DataInput
+        while (charsSkipped < charsToSkip) {
+            int c = in.read();
+            if (c == -1) {
+                break;
+            }
+            charsSkipped++;
+            if ((c & 0x80) == 0x00) { // 8th bit set (top bit)
+                // Found char of one byte width.
+                bytesSkipped++;
+            } else if ((c & 0x60) == 0x40) { // 7th bit set, 6th bit unset
+                // Found char of two byte width.
+                if (skipPersistent(in, 1L) != 1L) {
+                    // No second byte present.
+                    throw new UTFDataFormatException(
+                        "Second byte in two byte character missing; byte pos " +
+                        bytesSkipped + " ; char pos " + charsSkipped);
+                }
+                bytesSkipped += 2;
+            } else if ((c & 0x70) == 0x60) { // 7th and 6th bit set, 5th unset
+                // Found char of three byte width.
+                int skipped = 0;
+                if (c == 0xe0) {
+                    // Check for Derby EOF marker.
+                    int c1 = in.read();
+                    int c2 = in.read();
+                    if (c1 == 0x00 && c2 == 0x00) {
+                        // Found Derby EOF marker, exit loop.
+                        charsSkipped--; // Compensate by subtracting one.
+                        break;
+                    }
+                    // Do some rudimentary error checking.
+                    // Allow everything except EOF, which is the same as done in
+                    // normal processing (skipPersistent below).
+                    if (c1 != -1 && c2 != -1) {
+                        skipped = 2;
+                    }
+                } else {
+                    skipped = (int)skipPersistent(in, 2L);
+                }
+                if (skipped != 2) {
+                    // No second or third byte present
+                    throw new UTFDataFormatException(
+                        "Second or third byte in three byte character " +
+                        "missing; byte pos " + bytesSkipped + " ; char pos " +
+                        charsSkipped);
+                }
+                bytesSkipped += 3;
+            } else {
+                throw new UTFDataFormatException(
+                    "Invalid UTF-8 encoding encountered: (decimal) " + c);
+            }
+        }
+        // We don't close the stream, since it might be reused. One example of
+        // this is use of Resetable streams.
+        return new long[] {charsSkipped, bytesSkipped};
+    }
+
+    /**
+     * Tries harder to skip the requested number of bytes.
+     * <p>
+     * Note that even if the method fails to skip the requested number of bytes,
+     * it will not throw an exception. If this happens, the caller can be sure
+     * that end-of-stream has been reached.
+     *
+     * @param in byte stream
+     * @param bytesToSkip the number of bytes to skip
+     * @return The number of bytes skipped.
+     * @throws IOException if reading from the stream fails
+     */
+    private static final long skipPersistent(InputStream in, long bytesToSkip)
+            throws IOException {
+        long skipped = 0;
+        while (skipped < bytesToSkip) {
+            long skippedNow = in.skip(bytesToSkip - skipped);
+            if (skippedNow <= 0) {
+                if (in.read() == -1) {
+                    // EOF, return what we have and leave it up to caller to
+                    // decide what to do about it.
+                    break;
+                } else {
+                    skippedNow = 1; // Added to count below.
+                }
+            }
+            skipped += skippedNow;
+        }
+        return skipped;
+    }
+
+    private static final boolean isDerbyEOFMarker(InputStream in)
+            throws IOException {
+        // Expected to have read 224 (0xe0), check if the two next bytes are 0.
+        return (in.read() == 0x00 && in.read() == 0x00);
+    }
+} // End class UTF8Util

Propchange: db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/UTF8Util.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/InternalClob.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/InternalClob.java?view=diff&rev=544111&r1=544110&r2=544111
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/InternalClob.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/InternalClob.java Mon Jun 
4 02:49:56 2007
@@ -58,6 +58,7 @@
      *
      * @param charPos character position. The first position is <code>1</code>.
      * @return A 0-based byte position.
+     * @throws EOFException if the position is bigger than the Clob
      * @throws IOException if accessing the underlying I/O resources fail
      * @throws SQLException if the specified character position is invalid
      */
@@ -79,6 +80,12 @@
      * encoding. There is no predetermined encoding associated with this byte
      * stream, it is up to the Clob representation which one it uses.
      * <p>
+     * This stream may be an internal store stream, and should not be directly
+     * published to the end user (returned through the JDBC API). There are two
+     * motivations for this; the stream may be closed by the end user when it is
+     * not supposed to, and operations on the stream might throw exceptions we
+     * do not want to present to the end user unwrapped.
+     * <p>
      * The primary use of this method is to clone the Clob contents without
      * going via char (or String). Make sure the clone uses the same encoding
      * as the original Clob representation.
@@ -97,6 +104,7 @@
      * @param characterPosition character position. The first character is at
      *      position <code>1</code>.
      * @return A <code>Reader</coder> serving the content of the Clob.
+     * @throws EOFException if the position is larger then the Clob
      * @throws IOException if accessing underlying I/O resources fail
      * @throws SQLException if accessing underlying resources fail
      */

Added: db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/StoreStreamClob.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/StoreStreamClob.java?view=auto&rev=544111
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/StoreStreamClob.java (added)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/StoreStreamClob.java Mon Jun
 4 02:49:56 2007
@@ -0,0 +1,346 @@
+/*
+
+   Derby - Class org.apache.derby.impl.jdbc.StoreStreamClob
+
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+ */
+package org.apache.derby.impl.jdbc;
+
+import java.io.BufferedInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.Writer;
+
+import java.sql.SQLException;
+import org.apache.derby.iapi.error.StandardException;
+import org.apache.derby.iapi.reference.SQLState;
+import org.apache.derby.iapi.types.Resetable;
+import org.apache.derby.iapi.types.TypeId;
+import org.apache.derby.iapi.util.UTF8Util;
+
+/**
+ * A read-only Clob representation operating on streams out of the Derby store
+ * module.
+ * <p>
+ * Note that the streams from the store are expected to have the following
+ * properties:
+ * <ol> <li>The first two bytes are used for length encoding. Note that due to
+ *          the inadequate max number of this format, it is always ignored. This
+ *          is also true if there actually is a length encoded there. The two
+ *          bytes are excluded from the length of the stream.
+ *      <li>A Derby-specific end-of-stream marker at the end of the stream can
+ *          be present. The marker is expected to be <code>0xe0 0x00 0x00</code>
+ * </ol>
+ */
+final class StoreStreamClob
+    implements InternalClob {
+
+    /** Maximum value used when requesting bytes/chars to be skipped. */
+    private static final long SKIP_BUFFER_SIZE = 8*1024; // 8 KB
+
+    /** Tells whether this Clob has been released or not. */
+    private volatile boolean released = false;
+
+    /**
+     * The stream from store, used to read bytes from the database.
+     * <p>
+     * To be able to support the requirements, the stream must implement
+     * {@link Resetable}.
+     */
+    //@GuardedBy("synchronizationObject")
+    private final InputStream storeStream;
+    /** The connection (child) this Clob belongs to. */
+    private final ConnectionChild conChild;
+    /** Object used for synchronizing access to the store stream. */
+    private final Object synchronizationObject;
+
+
+    /**
+     * Creates a new Clob based on a stream from store.
+     * <p>
+     * Note that the stream passed in have to fulfill certain requirements,
+     * which are not currently totally enforced by Java (the language).
+     *
+     * @param stream the stream containing the Clob value. This stream is
+     *      expected to implement {@link Resetable} and to be a
+     *      {@link org.apache.derby.iapi.services.io.FormatIdInputStream} with
+     *      an ${link org.apache.derby.impl.store.raw.data.OverflowInputStream}
+     *      inside. However, the available interfaces does not guarantee this.
+     *      See the class JavaDoc for more information about this stream.
+     * @param conChild the connection (child) this Clob belongs to
+     * @throws StandardException if initializing the store stream fails
+     * @see org.apache.derby.iapi.services.io.FormatIdInputStream
+     * @see org.apache.derby.impl.store.raw.data.OverflowInputStream
+     */
+    public StoreStreamClob(InputStream stream, ConnectionChild conChild)
+            throws StandardException {
+        if (stream == null) {
+            throw new IllegalArgumentException("Stream cannot be <null>");
+        }
+        if (conChild == null) {
+            throw new IllegalArgumentException("conChild cannot be <null>");
+        }
+        if (!(stream instanceof Resetable)) {
+            throw new IllegalArgumentException("Stream must be resetable: " +
+                stream.toString());
+        }
+        this.storeStream = stream;
+        this.conChild = conChild;
+        this.synchronizationObject = conChild.getConnectionSynchronization();
+        ((Resetable)this.storeStream).initStream();
+    }
+
+    /**
+     * Releases resources associated with this Clob.
+     */
+    public void release() {
+        if (!released) {
+            ((Resetable)this.storeStream).closeStream();
+            this.released = true;
+        }
+    }
+
+    /**
+     * Returns the number of bytes in the Clob.
+     *
+     * @return The number of bytes in the Clob.
+     * @throws IOException if accessing the I/O resources fail
+     * @throws SQLException if accessing the store resources fail
+     */
+    public long getByteLength()
+            throws IOException, SQLException {
+        checkIfValid();
+        // Read through the whole stream to get the length.
+        long byteLength = 0;
+        try {
+            this.conChild.setupContextStack();
+            // See if length is encoded in the stream.
+            byteLength = resetStoreStream(true);
+            if (byteLength == 0) {
+                while (true) {
+                    long skipped = this.storeStream.skip(SKIP_BUFFER_SIZE);
+                    if (skipped <= 0) {
+                        break;
+                    }
+                    byteLength += skipped;
+                }
+                // Subtract 3 bytes for the end-of-stream marker.
+                byteLength -= 3;
+            }
+            return byteLength;
+        } finally {
+            this.conChild.restoreContextStack();
+        }
+    }
+
+    /**
+     * Returns the number of characters in the Clob.
+     *
+     * @return Number of characters in the Clob.
+     * @throws SQLException if any kind of error is encountered, be it related
+     *      to I/O or something else
+     */
+    public long getCharLength()
+            throws SQLException {
+        checkIfValid();
+        synchronized (this.synchronizationObject) {
+            this.conChild.setupContextStack();
+            try {
+                return UTF8Util.skipUntilEOF(
+                                new BufferedInputStream(getRawByteStream()));
+            } catch (Throwable t) {
+                throw noStateChangeLOB(t);
+            } finally {
+                this.conChild.restoreContextStack();
+            }
+        }
+    }
+
+    /**
+     * Returns a stream serving the raw bytes of this Clob.
+     * <p>
+     * Note that the stream returned is an internal stream, and it should not be
+     * pulished to end users.
+     *
+     * @return A stream serving the bytes of this Clob, initialized at byte 0 of
+     *      the data. The buffer must be assumed to be unbuffered, but no such
+     *      guarantee is made.
+     * @throws IOException if accessing the I/O resources fail
+     * @throws SQLException if accessing the store resources fail
+     */
+    public InputStream getRawByteStream()
+            throws IOException, SQLException {
+        checkIfValid();
+        resetStoreStream(true);
+        return this.storeStream;
+    }
+
+    /**
+     * Returns a reader for the Clob, initialized at the specified character
+     * position.
+     *
+     * @param pos character position. The first character is at position 1.
+     * @return A reader initialized at the specified position.
+     * @throws EOFException if the positions is larger than the Clob
+     * @throws IOException if accessing the I/O resources fail
+     * @throws SQLException if accessing the store resources fail
+     */
+    public Reader getReader(long pos)
+            throws IOException, SQLException  {
+        checkIfValid();
+        resetStoreStream(false);
+        Reader reader = new UTF8Reader(this.storeStream, TypeId.CLOB_MAXWIDTH,
+            this.conChild, this.synchronizationObject);
+        long leftToSkip = pos -1;
+        long skipped;
+        while (leftToSkip > 0) {
+            skipped = reader.skip(leftToSkip);
+            // Since Reader.skip block until some characters are available,
+            // a return value of 0 must mean EOF.
+            if (skipped <= 0) {
+                throw new EOFException("Reached end-of-stream prematurely");
+            }
+            leftToSkip -= skipped;
+        }
+        return reader;
+    }
+
+    /**
+     * Returns the byte position for the specified character position.
+     *
+     * @param charPos character position. First character is at position 1.
+     * @return Corresponding byte position. First byte is at position 0.
+     * @throws EOFException if the position is bigger then the Clob
+     * @throws IOException if accessing the underlying I/O resources fail
+     * @throws SQLException if accessing the underlying store resources fail
+     */
+    public long getBytePosition(long charPos)
+            throws IOException, SQLException {
+        return UTF8Util.skipFully(getRawByteStream(), charPos -1);
+    }
+
+    /**
+     * Not supported.
+     *
+     * @see InternalClob#getWriter
+     */
+    public Writer getWriter(long pos) {
+        throw new UnsupportedOperationException(
+            "A StoreStreamClob object is not updatable");
+    }
+
+    /**
+     * Not supported.
+     *
+     * @see InternalClob#insertString
+     */
+    public long insertString(String str, long pos) {
+        throw new UnsupportedOperationException(
+            "A StoreStreamClob object is not updatable");
+    }
+
+    /**
+     * Tells if this Clob can be modified.
+     *
+     * @return <code>false</code>, this Clob is read-only.
+     */
+    public boolean isWritable() {
+        return false;
+    }
+
+    /**
+     * Not supported.
+     *
+     * @see InternalClob#truncate
+     */
+    public void truncate(long newLength) {
+        throw new UnsupportedOperationException(
+            "A StoreStreamClob object is not updatable");
+    }
+
+    /**
+     * Wrap real exception in a {@link SQLException} to avoid changing the state
+     * of the connection child by cleaning it up.
+     *
+     * @param t real cause of error that we want to "ignore" with respect to
+     *      transaction context cleanup
+     * @return A {@link SQLException} wrapped around the real cause of the error
+     */
+    private static SQLException noStateChangeLOB(Throwable t) {
+        if (t instanceof StandardException)
+        {
+            // container closed means the blob or clob was accessed after commit
+            if (((StandardException) t).getMessageId().equals(SQLState.DATA_CONTAINER_CLOSED))
+            {
+                t = StandardException.newException(SQLState.BLOB_ACCESSED_AFTER_COMMIT);
+            }
+        }
+        return org.apache.derby.impl.jdbc.EmbedResultSet.noStateChangeException(t);
+    }
+
+    /**
+     * Makes sure the Clob has not been released.
+     * <p>
+     * All operations are invalid on a released Clob.
+     *
+     * @throws IllegalStateException if the Clob has been released
+     */
+    private void checkIfValid() {
+        if (this.released) {
+            throw new IllegalStateException(
+                "The Clob has been released and is not valid");
+        }
+    }
+
+    /**
+     * Reset the store stream, skipping two bytes of length encoding if
+     * requested.
+     *
+     * @param skipEncodedLength <code>true</code> will cause length encoding
to
+     *      be skipped. Note that the length is not always recorded when data is
+     *      written to store, and therefore it is ignored.
+     * @return The length encoded in the stream, or <code>-1</code> if the
+     *      length information is not decoded. A return value of <code>0</code>
+     *      means the stream is ended with a Derby end-of-stream marker.
+     * @throws IOException if skipping the two bytes fails
+     * @throws SQLException if resetting the stream fails in store
+     */
+    private long resetStoreStream(boolean skipEncodedLength)
+            throws IOException, SQLException {
+        try {
+            ((Resetable)this.storeStream).resetStream();
+        } catch (StandardException se) {
+            throw noStateChangeLOB(se);
+        }
+        long encodedLength = -1L;
+        if (skipEncodedLength) {
+            int b1 = this.storeStream.read();
+            int b2 = this.storeStream.read();
+            if (b1 == -1 || b2 == -1) {
+                throw Util.setStreamFailure(
+                    new IOException("Reached end-of-stream prematurely"));
+            }
+            // Length is currently written as an unsigned short.
+            encodedLength = (b1 << 8) + (b2 << 0);
+        }
+        return encodedLength;
+    }
+} // End class StoreStreamClob

Propchange: db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/StoreStreamClob.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message