db-derby-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d..@apache.org
Subject svn commit: r1038440 - in /db/derby/code/trunk/java: engine/org/apache/derby/iapi/util/ engine/org/apache/derby/impl/store/raw/data/ testing/org/apache/derbyTesting/functionTests/tests/store/
Date Wed, 24 Nov 2010 02:00:33 GMT
Author: dag
Date: Wed Nov 24 02:00:32 2010
New Revision: 1038440

URL: http://svn.apache.org/viewvc?rev=1038440&view=rev
Log:
DERBY-4741 Make Derby work reliably in the presence of thread interrupts

Patch derby-4741-b-04-nio, which adds container recovery to NIO
RAFContainer4.  The old test Derby151Test has been decommissioned, and
a new one, InterruptResilienceTest has been added. Currently it only
tests the basic recovery case in a two thread scenario. Should add
more tests. The debug flag "RAF4" can be used with sane builds to
observe recovery events.


Added:
    db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptDetectedException.java   (with props)
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/InterruptResilienceTest.java   (with props)
Removed:
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/Derby151Test.java
Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptStatus.java
    db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/FileContainer.java
    db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer.java
    db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer4.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/_Suite.java

Added: db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptDetectedException.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptDetectedException.java?rev=1038440&view=auto
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptDetectedException.java (added)
+++ db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptDetectedException.java Wed Nov 24 02:00:32 2010
@@ -0,0 +1,40 @@
+/*
+
+   Derby - Class org.apache.derby.iapi.util.InterruptDetectedException
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to you under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+package org.apache.derby.iapi.util;
+
+import org.apache.derby.iapi.error.StandardException;
+
+
+/**
+    An exception used to pass a specific "error code" through
+    various layers of software.
+*/
+public class InterruptDetectedException extends StandardException {
+
+    /*
+    ** Constructors
+    */
+
+    public InterruptDetectedException() {
+        super("nospc.U");
+    }
+}

Propchange: db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptDetectedException.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptStatus.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptStatus.java?rev=1038440&r1=1038439&r2=1038440&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptStatus.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/iapi/util/InterruptStatus.java Wed Nov 24 02:00:32 2010
@@ -63,7 +63,7 @@ public class InterruptStatus {
 
     /**
      * Make a note that this thread saw an interrupt. Thread's intr
-     * status flag is presumably off alread, but we reset it here
+     * status flag is presumably off already, but we reset it here
      * also. Use lcc if available, else thread local variable.
      */
     public static void setInterrupted() {

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/FileContainer.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/FileContainer.java?rev=1038440&r1=1038439&r2=1038440&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/FileContainer.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/FileContainer.java Wed Nov 24 02:00:32 2010
@@ -35,6 +35,9 @@ import org.apache.derby.iapi.services.io
 import org.apache.derby.iapi.services.io.StoredFormatIds;
 import org.apache.derby.iapi.services.io.TypedFormat;
 
+import org.apache.derby.iapi.util.InterruptStatus;
+import org.apache.derby.iapi.util.InterruptDetectedException;
+
 import org.apache.derby.iapi.error.StandardException;
 import org.apache.derby.iapi.store.raw.ContainerHandle;
 import org.apache.derby.iapi.store.raw.ContainerKey;
@@ -1497,6 +1500,9 @@ abstract class FileContainer 
 		}
 	}
 
+    protected final static int INTERRUPT_RETRY_SLEEP = 500; // millis
+    protected final static int MAX_INTERRUPT_RETRIES = 120; // i.e. 60s
+
 	/**
 	  Create a new page in the container.
 
@@ -1559,7 +1565,9 @@ abstract class FileContainer 
 
 		long lastPage;			// last allocated page
 		long lastPreallocPage;	// last pre-allcated page
-		long pageNumber;		// the page number of the new page
+        long pageNumber =
+            ContainerHandle.INVALID_PAGE_NUMBER; // init to appease compiler
+                                // the page number of the new page
 		PageKey pkey;			// the identity of the new page
 		boolean reuse;			// if true, we are trying to reuse a page
 
@@ -1567,7 +1575,10 @@ abstract class FileContainer 
 		/* need to retry a couple of times */
 		boolean retry;
 		int numtries = 0;
-		long startSearch = lastAllocatedPage;
+
+        int maxTries = MAX_INTERRUPT_RETRIES;
+
+        long startSearch = lastAllocatedPage;
 
 		AllocPage allocPage = null;	// the alloc page
 		BasePage page = null;	// the new page
@@ -1632,8 +1643,40 @@ abstract class FileContainer 
                      *
                      * Note that write page can proceed as usual.
                      */
-					allocPage = 
-                        findAllocPageForAdd(allocHandle, ntt, startSearch);
+                    try {
+                        allocPage =
+                            findAllocPageForAdd(allocHandle, ntt, startSearch);
+                    } catch (InterruptDetectedException e) {
+                        // Retry. We needed to back all the way up here in the
+                        // case of the container having been closed due to an
+                        // interrupt on another thread, since that thread's
+                        // recovery needs the monitor to allocCache which we
+                        // hold. We release it when we do "continue" below.
+                        if (--maxTries > 0) {
+                            // Clear firstAllocPageNumber, i.e. undo side
+                            // effect of makeAllocPage, so retry will work
+                            firstAllocPageNumber =
+                                ContainerHandle.INVALID_PAGE_NUMBER;
+                            retry = true;
+
+                            // Wait a bit so recovery can take place before
+                            // we re-grab monitor on "this" (which recovery
+                            // needs) and retry writeRAFHeader.
+                            try {
+                                Thread.sleep(INTERRUPT_RETRY_SLEEP);
+                            } catch (InterruptedException ee) {
+                                // This thread received an interrupt as
+                                // well, make a note.
+                                InterruptStatus.setInterrupted();
+                            }
+
+                            continue;
+                        } else {
+                            throw StandardException.newException(
+                                SQLState.FILE_IO_INTERRUPTED, e);
+                        }
+                    }
+
 
 					allocCache.invalidate(allocPage, allocPage.getPageNumber());
 				}
@@ -2014,13 +2057,48 @@ abstract class FileContainer 
 		 throws StandardException
 	{
 		boolean retval = false;
+        boolean done;
+        int maxTries = MAX_INTERRUPT_RETRIES;
 
-		synchronized(allocCache)
-		{
-			if (pagenum <= allocCache.getLastPageNumber(handle, firstAllocPageNumber) && 
-				allocCache.getPageStatus(handle, pagenum, firstAllocPageNumber) == AllocExtent.ALLOCATED_PAGE)
-				retval = true;
-		}
+        do {
+            done = true;
+            synchronized(allocCache) {
+                try {
+                    if (pagenum <= allocCache.getLastPageNumber(
+                                handle, firstAllocPageNumber) &&
+                            (allocCache.getPageStatus(
+                                handle, pagenum, firstAllocPageNumber) ==
+                                 AllocExtent.ALLOCATED_PAGE)) {
+
+                        retval = true;
+                    }
+                } catch (InterruptDetectedException e) {
+                    // Retry. We needed to back all the way up here in the case
+                    // of the (file) container having been closed due to an
+                    // interrupt since the recovery needs the monitor to
+                    // allocCache
+                    if (--maxTries > 0) {
+                        done = false;
+
+                        // Wait a bit so recovery can take place before
+                        // we re-grab monitor on "this" (which recovery
+                        // needs) and retry writeRAFHeader.
+                        try {
+                            Thread.sleep(INTERRUPT_RETRY_SLEEP);
+                        } catch (InterruptedException ee) {
+                            // This thread received an interrupt as
+                            // well, make a note.
+                            InterruptStatus.setInterrupted();
+                        }
+
+                        continue;
+                    } else {
+                        throw StandardException.newException(
+                            SQLState.FILE_IO_INTERRUPTED, e);
+                    }
+                }
+            }
+        } while (!done);
 
 		return retval;
 	}

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer.java?rev=1038440&r1=1038439&r2=1038440&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer.java Wed Nov 24 02:00:32 2010
@@ -33,6 +33,9 @@ import org.apache.derby.iapi.services.di
 import org.apache.derby.iapi.services.sanity.SanityManager;
 import org.apache.derby.iapi.services.io.FormatIdUtil;
 
+import org.apache.derby.iapi.util.InterruptStatus;
+import org.apache.derby.iapi.util.InterruptDetectedException;
+
 import org.apache.derby.iapi.error.StandardException;
 
 import org.apache.derby.iapi.store.raw.ContainerHandle;
@@ -84,6 +87,14 @@ class RAFContainer extends FileContainer
 	private static final int BACKUP_CONTAINER_ACTION = 6;
     private static final int GET_RANDOM_ACCESS_FILE_ACTION = 7;
     private ContainerKey actionIdentity;
+
+    /**
+     * Identity of this container. Make it visible to RAFContainer4, which may
+     * need to reopen the container after interrupts due to a NIO channel being
+     * closed by the interrupt.
+     */
+    protected ContainerKey currentIdentity;
+
     private boolean actionStub;
     private boolean actionErrorOK;
     private boolean actionTryAlternatePath;
@@ -177,7 +188,7 @@ class RAFContainer extends FileContainer
 					}
 					catch (InterruptedException ie)
 					{
-						throw StandardException.interrupt(ie);
+                        InterruptStatus.setInterrupted();
 					}	
 				}
 			}
@@ -477,80 +488,114 @@ class RAFContainer extends FileContainer
 	{
 		boolean waited = false;
 
-		synchronized (this) {
+        // If interrupt recovery is in progress (NIO), we must expect to
+        // release our monitor on "this" and to retry writeRAFHeader, so be
+        // prepared to retry.
+        boolean success = false;
+        int maxTries = MAX_INTERRUPT_RETRIES; // ca 60s = (120 * 0.5s)
 
-			// committed and dropped, do nothing.  
-			// This file container has already been stubbified
-			if (getCommittedDropState()) {
-				clearDirty();
-				return;
-			}
+        while (!success) {
+            success = true;
 
-			// The container is about to change, need to wait till it is really
-			// changed.  We are in the predirty state only for the duration
-			// where the log record that changed the container has been sent to
-			// the log and before the change actually happened.
-			while(preDirty == true)
-			{
-				waited = true;
-				try
-				{
-					wait();
-				}
-				catch (InterruptedException ie)
-				{
-					throw StandardException.interrupt(ie);
-				}
-			}
+            synchronized (this) {
 
-			if (waited)
-			{
-				// someone else may have stubbified this while we waited 
-				if (getCommittedDropState())
-				{
-					clearDirty();
-					return;
-				}
-			}
+                // committed and dropped, do nothing.
+                // This file container has already been stubbified
+                if (getCommittedDropState()) {
+                    clearDirty();
+                    return;
+                }
+
+                // The container is about to change, need to wait till it is
+                // really changed.  We are in the predirty state only for the
+                // duration where the log record that changed the container has
+                // been sent to the log and before the change actually
+                // happened.
+                while(preDirty == true)
+                {
+                    waited = true;
+                    try
+                    {
+                        wait();
+                    }
+                    catch (InterruptedException ie)
+                    {
+                        InterruptStatus.setInterrupted();
+                    }
+                }
 
+                if (waited)
+                {
+                    // someone else may have stubbified this while we waited
+                    if (getCommittedDropState())
+                    {
+                        clearDirty();
+                        return;
+                    }
+                }
 
-			if (forRemove) {
 
-				//				removeFile()
-				//				clearDirty();
+                if (forRemove) {
 
-			} else if (isDirty()) {
- 
-				try {
+                    //              removeFile()
+                    //              clearDirty();
 
-					// Cannot get the alloc page and write it out
-					// because in order to do so, the alloc page will need to 
-					// find this container object.  But this container object
-					// is in the middle of being cleaned and may not be 
-					// 'found' and we will hang.
-					//
-					// Instead, just clobber the container info, which is 
-					// checksum'ed seperately from the alloc page
-					//
-                    writeRAFHeader(
-                        getIdentity(),
-                        fileData,
-								   false,  // don't create, container exists 
-								   true);  // syncfile
-
-					clearDirty();
-
-				} catch (IOException ioe) {
-
-					throw dataFactory.markCorrupt(
-                        StandardException.newException(
-                            SQLState.FILE_CONTAINER_EXCEPTION, ioe,
-                            getIdentity() != null ?
-                               getIdentity().toString() : "unknown",
-                            "clean", fileName));
-				}
-			}
-		}
+                } else if (isDirty()) {
+
+                    try {
+
+                        // Cannot get the alloc page and write it out because
+                        // in order to do so, the alloc page will need to find
+                        // this container object.  But this container object is
+                        // in the middle of being cleaned and may not be
+                        // 'found' and we will hang.
+                        //
+                        // Instead, just clobber the container info, which is
+                        // checksum'ed seperately from the alloc page
+                        //
+                        writeRAFHeader(
+                            getIdentity(),
+                            fileData,
+                            false,  // don't create, container exists
+                            true);  // syncfile
+
+                        clearDirty();
+
+                    } catch (InterruptDetectedException e) {
+                        if (--maxTries > 0) {
+                            success = false;
+
+                            // Wait a bit so recovery can take place before
+                            // we re-grab monitor on "this" (which recovery
+                            // needs) and retry writeRAFHeader.
+                            try {
+                                Thread.sleep(500); // 0.5s
+                            } catch (InterruptedException ee) {
+                                // This thread received an interrupt as
+                                // well, make a note.
+                                InterruptStatus.setInterrupted();
+                            }
+
+                            continue; // retry write of RAFHeader
+                        } else {
+                            // We have tried for a minute, not sure what's
+                            // going on, so to be on safe side we can't
+                            // continue
+                            throw StandardException.newException(
+                                SQLState.FILE_IO_INTERRUPTED, e);
+                        }
+                    } catch (IOException ioe) {
+
+                        throw dataFactory.markCorrupt(
+                            StandardException.newException(
+                                SQLState.FILE_CONTAINER_EXCEPTION, ioe,
+                                getIdentity() != null ?
+                                getIdentity().toString() : "unknown",
+                                "clean", fileName));
+                    }
+                }
+            }
+        }
 	}
 
 	private void clearDirty() {
@@ -815,6 +860,7 @@ class RAFContainer extends FileContainer
         try
         {
             AccessController.doPrivileged( this);
+            currentIdentity = newIdentity;
         }
         catch( PrivilegedActionException pae){ throw (StandardException) pae.getException();}
         finally{ actionIdentity = null; }
@@ -859,7 +905,12 @@ class RAFContainer extends FileContainer
         actionIdentity = newIdentity;
         try
         {
-            return AccessController.doPrivileged( this) != null;
+            boolean success = AccessController.doPrivileged(this) != null;
+            if (success) {
+                currentIdentity = newIdentity;
+            }
+
+            return success;
         }
         catch( PrivilegedActionException pae) { 
             closeContainer();
@@ -1034,7 +1085,7 @@ class RAFContainer extends FileContainer
                         }
                         catch (InterruptedException ie)
                         {
-                            throw StandardException.interrupt(ie);
+                            InterruptStatus.setInterrupted();
                         }	
                     }
 

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer4.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer4.java?rev=1038440&r1=1038439&r2=1038440&view=diff
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer4.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/store/raw/data/RAFContainer4.java Wed Nov 24 02:00:32 2010
@@ -27,6 +27,8 @@ import org.apache.derby.iapi.reference.S
 import org.apache.derby.iapi.services.sanity.SanityManager;
 
 import org.apache.derby.iapi.store.raw.ContainerKey;
+import org.apache.derby.iapi.util.InterruptStatus;
+import org.apache.derby.iapi.util.InterruptDetectedException;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -35,13 +37,15 @@ import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.channels.ClosedChannelException;
 import java.nio.channels.ClosedByInterruptException;
+import java.nio.channels.AsynchronousCloseException;
 import org.apache.derby.io.StorageRandomAccessFile;
 
 /**
- * RAFContainer4 overrides a few methods in RAFContainer in an attempt to use
- * FileChannel from Java 1.4's New IO framework to issue multiple IO operations
- * to the same file concurrently instead of strictly serializing IO operations
- * using a mutex on the container object.
+ * RAFContainer4 overrides a few methods in FileContainer/RAFContainer in order
+ * to use FileChannel from Java 1.4's New IO framework to issue multiple IO
+ * operations to the same file concurrently instead of strictly serializing IO
+ * operations using a mutex on the container object. Since we compile with Java
+ * 1.4, the override "annotations" are inside the method javadoc headers.
  * <p>
  * Note that our requests for multiple concurrent IOs may be serialized further
  * down in the IO stack - this is entirely up to the JVM and OS. However, at
@@ -75,11 +79,42 @@ class RAFContainer4 extends RAFContainer
      */
     private FileChannel ourChannel = null;
 
-    /**
+    private Object channelCleanupMonitor = new Object();
+
+    // channelCleanupMonitor protects next three state variables:
+
+    private Thread threadDoingRestore = null;
+
+    // volatile on threadsInPageIO, is just to ensure that we get a correct
+    // value for debugging: we can't always use channelCleanupMonitor
+    // then. Not safe on 1.4, but who cares..
+    private volatile int threadsInPageIO = 0;
+
+    // volatile on restoreChannelInProgress: corner case where we can't use
+    // channelCleanupMonitor: the corner case should not happen if NIO works as
+    // specified: thats is, uniquely only one thread sees
+    // ClosedByInterruptException, always.  Unfortunately, we sometimes get
+    // AsynchronousCloseException, which another thread could theoretically
+    // also see it it were interrupted at the same time inside NIO. In this
+    // case, we could get two threads competing to do recovery. This is
+    // normally OK, unless the thread owns allocCache or "this", in which case
+    // we risk dead-lock if we synchronize on restoreChannelInProgress
+    // (explained below). So, we have to rely on volatile, which isn't safe in
+    // Java 1.4 (old memory model),
+    private volatile boolean restoreChannelInProgress = false;
+
+
+    // In case the recovering thread can't successfully recover the container,
+    // it will throw, so other waiting threads need to give up as well.  This
+    // can happen at shutdown time when interrupts are used to stop threads.
+    private boolean giveUpIO = false;
+    private final Object giveUpIOm = new Object(); // its monitor
+
+/**
      * For debugging - will be incremented when an IO is started, decremented
      * when it is done. Should be == 0 when container state is changed.
      */
-    private int iosInProgress = 0;
+    private int iosInProgress = 0; // protected by monitor on "this"
 
     public RAFContainer4(BaseDataFileFactory factory) {
         super(factory);
@@ -133,6 +168,8 @@ class RAFContainer4 extends RAFContainer
     /*
      * Wrapping methods that retrieve the FileChannel from RAFContainer's
      * fileData after calling the real methods in RAFContainer.
+     *
+     * override of RAFContainer#openContainer
      */
     synchronized boolean openContainer(ContainerKey newIdentity)
         throws StandardException
@@ -148,6 +185,9 @@ class RAFContainer4 extends RAFContainer
         return super.openContainer(newIdentity);
     }
 
+    /**
+     * override of RAFContainer#createContainer
+     */
     synchronized void createContainer(ContainerKey newIdentity)
         throws StandardException
     {
@@ -161,7 +201,9 @@ class RAFContainer4 extends RAFContainer
         super.createContainer(newIdentity);
     }
 
-
+    /**
+     * override of RAFContainer#closeContainer
+     */
     synchronized void closeContainer() {
         if (SanityManager.DEBUG) {
             // Any IOs in progress to a container being dropped will be
@@ -191,7 +233,9 @@ class RAFContainer4 extends RAFContainer
 
     /**
      *  Read a page into the supplied array.
-     *
+     *  <p/>
+     *  override of RAFContainer#readPage
+     *  <p/>
      *  <BR> MT - thread safe
      *  @exception IOException exception reading page
      *  @exception StandardException Standard Derby error policy
@@ -199,33 +243,188 @@ class RAFContainer4 extends RAFContainer
     protected void readPage(long pageNumber, byte[] pageData)
          throws IOException, StandardException
     {
-        // If this is the first alloc page, there may be another thread
-        // accessing the container information in the borrowed space on the
-        // same page. In that case, we synchronize the entire method call, just
-        // like RAFContainer.readPage() does, in order to avoid conflicts. For
-        // all other pages it is safe to skip the synchronization, since
-        // concurrent threads will access different pages and therefore don't
-        // interfere with each other.
-        if (pageNumber == FIRST_ALLOC_PAGE_NUMBER) {
-            synchronized (this) {
-                readPage0(pageNumber, pageData);
+        readPage(pageNumber, pageData, -1L);
+    }
+
+
+    /**
+     *  Read a page into the supplied array.
+     *  <p/>
+     *  override of RAFContainer#readPage
+     *  <p/>
+     *  <BR> MT - thread safe
+
+     *  @param pageNumber the page number to read data from, or -1 (called from
+     *                    getEmbryonicPage)
+     *  @param pageData  the buffer to read data into
+     *  @param offset -1 normally (not used since offset is computed from
+     *                   pageNumber), but used if pageNumber == -1
+     *                   (getEmbryonicPage)
+     *  @exception IOException exception reading page
+     *  @exception StandardException Standard Derby error policy
+     */
+    private void readPage(long pageNumber, byte[] pageData, long offset)
+         throws IOException, StandardException
+    {
+        // Interrupt recovery: If this thread holds a monitor on "this" (when
+        // RAFContainer#clean calls getEmbryonicPage via writeRAFHEader) or
+        // "allocCache" (e.g. FileContainer#newPage, #pageValid) we cannot grab
+        // channelCleanupMonitor lest another thread is one doing recovery,
+        // since the recovery thread will try to grab both those monitors
+        // during container resurrection.  So, just forge ahead in stealth mode
+        // (i.e. the recovery thread doesn't see us). If we see
+        // ClosedChannelException, throw InterruptDetectedException, so we can
+        // retry from RAFContainer ("this") or FileContainer ("allocCache")
+        // after having released the relevant monitor.
+
+        final boolean holdsThis = Thread.holdsLock(this);
+        final boolean holdsAllocCache = Thread.holdsLock(allocCache);
+
+        final boolean stealthMode = holdsThis || holdsAllocCache;
+
+        if (SanityManager.DEBUG) {
+            // getEmbryonicPage only
+            if (pageNumber == -1) {
+                SanityManager.ASSERT(holdsThis);
+            }
+            if (holdsThis) {
+                SanityManager.ASSERT(pageNumber == -1);
             }
+        }
+
+
+        if (stealthMode) {
+            // We go into stealth mode. If we see an
+            // CloseChannelExceptionexception, we will get out of here anyway,
+            // so we don't need to increment threadsInPageIO (nor can we,
+            // without risking dead-lock),
         } else {
-            readPage0(pageNumber, pageData);
+            synchronized (channelCleanupMonitor) {
+
+                // Gain entry
+                while (restoreChannelInProgress) {
+                    if (Thread.currentThread() == threadDoingRestore) {
+                        // Reopening the container will do readEmbryonicPage
+                        // (i.e. ReadPage is called recursively from
+                        // recoverContainerAfterInterrupt), so now let's make
+                        // sure we don't get stuck waiting for ourselves ;-)
+                        break;
+                    }
+
+                    try {
+                        channelCleanupMonitor.wait();
+                    } catch (InterruptedException e) {
+                        InterruptStatus.noteAndClearInterrupt(
+                            "interrupt while waiting to gain entry",
+                            threadsInPageIO,
+                            hashCode());
+                    }
+
+                }
+
+                threadsInPageIO++;
+            }
+        }
+
+
+        boolean success = false;
+        while (!success) {
+            try {
+                if (pageNumber == FIRST_ALLOC_PAGE_NUMBER) {
+                    // If this is the first alloc page, there may be another
+                    // thread accessing the container information in the
+                    // borrowed space on the same page. In that case, we
+                    // synchronize the entire method call, just like
+                    // RAFContainer.readPage() does, in order to avoid
+                    // conflicts. For all other pages it is safe to skip the
+                    // synchronization, since concurrent threads will access
+                    // different pages and therefore don't interfere with each
+                    // other:
+                    synchronized (this) {
+                        readPage0(pageNumber, pageData, offset);
+                    }
+                } else {
+                    // Normal case.
+                    readPage0(pageNumber, pageData, offset);
+                }
+
+                success = true;
+
+          //} catch (ClosedByInterruptException e) {
+          // Java NIO Bug 6979009:
+          // http://bugs.sun.com/view_bug.do?bug_id=6979009
+          // Sometimes NIO throws AsynchronousCloseException instead of
+          // ClosedByInterruptException
+            } catch (AsynchronousCloseException e) {
+                // Subsumes ClosedByInterruptException
+
+                // The interrupted thread may or may not get back here
+                // before other concurrent writers that will see
+                // ClosedChannelException, we have logic to handle that.
+                if (Thread.currentThread().isInterrupted()) {
+                    // Normal case
+                    if (recoverContainerAfterInterrupt(
+                                e.toString(),
+                                stealthMode)) {
+                        continue; // do I/O over again
+                    }
+                }
+
+
+                // Recovery is in progress, wait for another
+                // interrupted thread to clean up, i.e. act as if we
+                // had seen ClosedChannelException.
+
+                awaitRestoreChannel(e, stealthMode);
+
+            } catch (ClosedChannelException e) {
+                // We are not the thread that first saw the channel interrupt,
+                // so no recovery attempt.
+
+                // if we also have seen an interrupt, we might as well take
+                // notice now.
+                InterruptStatus.noteAndClearInterrupt(
+                    "readPage in ClosedChannelException",
+                    threadsInPageIO,
+                    hashCode());
+
+                // Recovery is in progress, wait for another interrupted thread
+                // to clean up.
+                awaitRestoreChannel(e, stealthMode);
+            }
+        }
+
+        if (stealthMode) {
+            // don't touch threadsInPageIO
+        } else {
+            synchronized (channelCleanupMonitor) {
+                threadsInPageIO--;
+            }
         }
     }
 
-    private void readPage0(long pageNumber, byte[] pageData)
+    private void readPage0(long pageNumber, byte[] pageData, long offset)
          throws IOException, StandardException
     {
         FileChannel ioChannel;
         synchronized (this) {
             if (SanityManager.DEBUG) {
-                SanityManager.ASSERT(!getCommittedDropState());
+                if (pageNumber != -1L) {
+                    SanityManager.ASSERT(!getCommittedDropState());
+                } // else: can happen from getEmbryonicPage
             }
             ioChannel = getChannel();
         }
 
+        if (SanityManager.DEBUG) {
+            if (pageNumber == -1L || pageNumber == FIRST_ALLOC_PAGE_NUMBER) {
+                // can happen from getEmbryonicPage
+                SanityManager.ASSERT(Thread.holdsLock(this));
+            } else {
+                SanityManager.ASSERT(!Thread.holdsLock(this));
+            }
+        }
+
         if(ioChannel != null) {
 
             long pageOffset = pageNumber * pageSize;
@@ -241,7 +440,18 @@ class RAFContainer4 extends RAFContainer
                     }
                 }
 
-                readFull(pageBuf, ioChannel, pageOffset);
+                if (offset == -1L) {
+                    // Normal page read doesn't specify offset,
+                    // so use one computed from page number.
+                    readFull(pageBuf, ioChannel, pageOffset);
+                } else {
+                    // getEmbryonicPage specifies it own offset, so use that
+                    if (SanityManager.DEBUG) {
+                        SanityManager.ASSERT(pageNumber == -1L);
+                    }
+
+                    readFull(pageBuf, ioChannel, offset);
+                }
             }
             finally {
                 if (SanityManager.DEBUG) {
@@ -253,7 +463,8 @@ class RAFContainer4 extends RAFContainer
             }
 
             if (dataFactory.databaseEncrypted() &&
-                pageNumber != FIRST_ALLOC_PAGE_NUMBER)
+                pageNumber != FIRST_ALLOC_PAGE_NUMBER &&
+                pageNumber != -1L /* getEmbryonicPage */)
             {
                 decryptPage(pageData, pageSize);
             }
@@ -267,7 +478,9 @@ class RAFContainer4 extends RAFContainer
 
     /**
      *  Write a page from the supplied array.
-     *
+     *  <p/>
+     *  override of RAFContainer#writePage
+     *  <p/>
      *  <BR> MT - thread safe
      *
      *  @exception StandardException Standard Derby error policy
@@ -276,22 +489,369 @@ class RAFContainer4 extends RAFContainer
     protected void writePage(long pageNumber, byte[] pageData, boolean syncPage)
          throws IOException, StandardException
     {
-        // If this is the first alloc page, there may be another thread
-        // accessing the container information in the borrowed space on the
-        // same page. In that case, we synchronize the entire method call, just
-        // like RAFContainer.writePage() does, in order to avoid conflicts. For
-        // all other pages it is safe to skip the synchronization, since
-        // concurrent threads will access different pages and therefore don't
-        // interfere with each other.
-        if (pageNumber == FIRST_ALLOC_PAGE_NUMBER) {
-            synchronized (this) {
-                writePage0(pageNumber, pageData, syncPage);
+        // Interrupt recovery: If this thread holds a monitor "allocCache"
+        // (e.g. FileContainer#newPage, #pageValid) we cannot grab
+        // channelCleanupMonitor lest another thread is one doing recovery,
+        // since the recovery thread will try to grab both those monitors
+        // during container resurrection.  So, just forge ahead in stealth mode
+        // (i.e. the recovery thread doesn't see us). If we see
+        // ClosedChannelException, throw InterruptDetectedException, so we can
+        // retry from FileContainer ("allocCache") after having released the
+        // relevant monitor.
+        boolean stealthMode = Thread.holdsLock(allocCache);
+
+        if (SanityManager.DEBUG) {
+            SanityManager.ASSERT(!Thread.holdsLock(this));
+        }
+
+       if (stealthMode) {
+            // We go into stealth mode. If we see an
+            // CloseChannelExceptionexception, we will get out of here anyway,
+            // so we don't need to increment threadsInPageIO (nor can we,
+            // without risking dead-lock),
+        } else {
+            synchronized (channelCleanupMonitor) {
+
+                // Gain entry
+                while (restoreChannelInProgress) {
+                    try {
+                        channelCleanupMonitor.wait();
+                    } catch (InterruptedException e) {
+                        InterruptStatus.noteAndClearInterrupt(
+                            "interrupt while waiting to gain entry",
+                            threadsInPageIO,
+                            hashCode());
+                    }
+
+                }
+
+                threadsInPageIO++;
+            }
+        }
+
+        boolean success = false;
+        while (!success) {
+            try {
+                if (pageNumber == FIRST_ALLOC_PAGE_NUMBER) {
+                    // If this is the first alloc page, there may be
+                    // another thread accessing the container information
+                    // in the borrowed space on the same page. In that
+                    // case, we synchronize the entire method call, just
+                    // like RAFContainer.writePage() does, in order to
+                    // avoid conflicts. For all other pages it is safe to
+                    // skip the synchronization, since concurrent threads
+                    // will access different pages and therefore don't
+                    // interfere with each other.
+                    synchronized (this) {
+                        writePage0(pageNumber, pageData, syncPage);
+                    }
+                } else {
+                    writePage0(pageNumber, pageData, syncPage);
+                }
+
+                success = true;
+
+          //} catch (ClosedByInterruptException e) {
+          // Java NIO Bug 6979009:
+          // http://bugs.sun.com/view_bug.do?bug_id=6979009
+          // Sometimes NIO throws AsynchronousCloseException instead of
+          // ClosedByInterruptException
+            } catch (AsynchronousCloseException e) {
+                // Subsumes ClosedByInterruptException
+
+                // The interrupted thread may or may not get back here
+                // before other concurrent writers that will see
+                // ClosedChannelException, we have logic to handle that.
+
+                if (Thread.currentThread().isInterrupted()) {
+                    // Normal case
+                    if (recoverContainerAfterInterrupt(
+                                e.toString(),
+                                stealthMode)) {
+                        continue; // do I/O over again
+                    }
+                }
+                // Recovery is in progress, wait for another
+                // interrupted thread to clean up, i.e. act as if we
+                // had seen ClosedChannelException.
+
+                awaitRestoreChannel(e, stealthMode);
+
+            } catch (ClosedChannelException e) {
+                // We are not the thread that first saw the channel interrupt,
+                // so no recovery attempt.
+
+                InterruptStatus.noteAndClearInterrupt(
+                    "writePage in ClosedChannelException",
+                    threadsInPageIO,
+                    hashCode());
+
+                // Recovery is in progress, wait for another
+                // interrupted thread to clean up, i.e. act as if we
+                // had seen ClosedChannelException.
+
+                awaitRestoreChannel(e, stealthMode);
             }
+        }
+
+        if (stealthMode) {
+            // don't touch threadsInPageIO
         } else {
-            writePage0(pageNumber, pageData, syncPage);
+            synchronized (channelCleanupMonitor) {
+                threadsInPageIO--;
+            }
         }
     }
 
+    private void awaitRestoreChannel (Exception e,
+                                      boolean stealthMode)
+            throws StandardException {
+
+        if (stealthMode) {
+            // Retry handled at FileContainer or RAFContainer level
+            //
+            // This is necessary since recovery needs the monitor on allocCache
+            // or "this" to clean up, so we need to back out all the way so
+            // this thread can release the monitor to allow recovery to
+            // proceed.
+            if (SanityManager.DEBUG) {
+                    debugTrace(
+                        "thread does stealth mode retry");
+            }
+
+            synchronized (giveUpIOm) {
+                if (giveUpIO) {
+
+                    if (SanityManager.DEBUG) {
+                        debugTrace(
+                            "giving up retry, another thread gave up " +
+                            "resurrecting container ");
+                    }
+                
+                    throw StandardException.newException(
+                        SQLState.FILE_IO_INTERRUPTED);
+                }
+            }
+
+            throw new InterruptDetectedException();
+        }
+
+        synchronized (channelCleanupMonitor) {
+            threadsInPageIO--;
+        }
+
+        // Wait here till the interrupted thread does container resurrection.
+        // If we get a channel exception for some other reason, this will never
+        // happen, so throw after waiting long enough (60s).
+
+        int timesWaited = -1;
+
+        while (true) {
+            synchronized(channelCleanupMonitor) {
+                while (restoreChannelInProgress) {
+                    timesWaited++;
+
+                    if (SanityManager.DEBUG) {
+                        debugTrace(
+                            "thread needs to wait for container recovery: " +
+                            "already waited " + timesWaited + " times");
+                    }
+
+                    if (timesWaited > MAX_INTERRUPT_RETRIES) {
+                        // Max 60s, then give up, probably way too long anyway,
+                        // but doesn't hurt?
+                        throw StandardException.newException(
+                            SQLState.FILE_IO_INTERRUPTED, e);
+                    }
+
+                    try {
+                        channelCleanupMonitor.wait(INTERRUPT_RETRY_SLEEP);
+                    } catch (InterruptedException we) {
+                        InterruptStatus.setInterrupted();
+                    }
+                }
+
+                threadsInPageIO++;
+                break;
+            }
+        }
+
+        synchronized (giveUpIOm) {
+            if (giveUpIO) {
+
+                if (SanityManager.DEBUG) {
+                    debugTrace(
+                        "giving up retry, another thread gave up " +
+                        "resurrecting container ");
+                }
+
+                throw StandardException.newException(
+                    SQLState.FILE_IO_INTERRUPTED);
+            }
+        }
+
+        if (timesWaited == -1) {
+            // We have not seen restoreChannelInProgress, so we may
+            // have raced past the interrupted thread, so let's wait a
+            // bit before we attempt a new I/O.
+            try {
+                Thread.sleep(INTERRUPT_RETRY_SLEEP);
+            } catch (InterruptedException we) {
+                // This thread is getting hit, too..
+                InterruptStatus.setInterrupted();
+            }
+        }
+    }
+
+
+    /**
+     * @param whence caller site (debug info)
+     * @param stealthMode don't update threadsInPageIO if true
+     * @return true if we did it, false if we saw someone else do it and
+     * abstained
+     */
+    private boolean recoverContainerAfterInterrupt(
+        String whence,
+        boolean stealthMode) throws StandardException {
+
+        if (stealthMode && restoreChannelInProgress) {
+            // Another interrupted thread got to do the cleanup before us, so
+            // yield.
+            // This should not happen, but since
+            // we had to "fix" NIO, cf. the code marked (**), we could
+            // theoretically see two:
+            //
+            // - the thread that got AsynchronousCloseException, but was the
+            //   one that caused the channel close: it will decide (correctly)
+            //   it is the one to do recovery.
+            //
+            // - another thread that got an interrupt after doing successful IO
+            //   but seeing a closed channel: it will decide (incorrectly) it
+            //   is the one to do recovery. But since we had to fix NIO, this
+            //   case gets conflated with the case that this was *really* the
+            //   thread the caused the channel close.
+            //
+            // Not safe for Java 1.4 (only volatile protection for
+            // restoreChannelInProgress here), compare safe test below (not
+            // stealthMode).
+
+            InterruptStatus.noteAndClearInterrupt(
+                whence,
+                threadsInPageIO,
+                hashCode());
+
+            return false;
+        }
+
+        synchronized (channelCleanupMonitor) {
+            if (restoreChannelInProgress) {
+                // Another interrupted thread got to do the cleanup before us,
+                // so yield, see above explanation.
+                InterruptStatus.noteAndClearInterrupt(
+                    whence,
+                    threadsInPageIO,
+                    hashCode());
+
+                return false;
+            }
+
+            if (stealthMode) {
+                // don't touch threadsInPageIO
+            } else {
+                threadsInPageIO--;
+            }
+
+            // All new writers will now wait till we're done, see "Gain entry"
+            // in writePage above. Any concurrent threads already inside will
+            // also wait till we're done, see below
+            restoreChannelInProgress = true;
+            threadDoingRestore = Thread.currentThread();
+        }
+
+        // Wait till other concurrent threads hit the wall
+        // (ClosedChannelException) and are a ready wait for us to clean up, so
+        // we can set them loose when we're done.
+        while (true) {
+            synchronized (channelCleanupMonitor) {
+                if (threadsInPageIO == 0) {
+                    // Either no concurrent threads, or they are now waiting
+                    // for us to clean up (see ClosedChannelException case)
+                    break;
+                }
+            }
+
+            try {
+                Thread.sleep(10);
+            } catch (InterruptedException te) {
+                // again! No need, we have already taken note, pal!
+            }
+        }
+
+
+        // Initiate recovery
+        synchronized (channelCleanupMonitor) {
+            try {
+                InterruptStatus.noteAndClearInterrupt(
+                    whence, threadsInPageIO, hashCode());
+
+                synchronized(this) {
+                    if (SanityManager.DEBUG) {
+                        SanityManager.ASSERT(ourChannel != null,
+                                             "ourChannel is null");
+                        SanityManager.ASSERT(!ourChannel.isOpen(),
+                                             "ourChannel is open");
+                    }
+                }
+
+                while (true) {
+                    synchronized(this) {
+                        try {
+                            closeContainer();
+                            openContainer(currentIdentity);
+                        } catch (Exception newE) {
+                            // Interrupted again?
+
+                            if (InterruptStatus.noteAndClearInterrupt(
+                                        "RAF: isInterrupted during recovery",
+                                        threadsInPageIO,
+                                        hashCode())) {
+                                continue;
+                            } else {
+                                // Something else failed - shutdown happening?
+                                synchronized(giveUpIOm) {
+                                    // Make sure other threads will give up and
+                                    // throw, too.
+                                    giveUpIO = true;
+
+                                    if (SanityManager.DEBUG) {
+                                        debugTrace(
+                                            "can't resurrect container: " +
+                                            newE);
+                                    }
+                                }
+
+                                throw StandardException.newException(
+                                    SQLState.FILE_IO_INTERRUPTED, newE);
+                            }
+                        }
+                        break;
+                    }
+                }
+
+                threadsInPageIO++;
+                // retry IO
+            } finally {
+                // Recovery work done (or failed), now set other threads free
+                // to retry or give up as the case may be, cf. giveUpIO.
+                restoreChannelInProgress = false;
+                threadDoingRestore = null;
+                channelCleanupMonitor.notifyAll();
+            }
+        } // end channelCleanupMonitor region
+
+        return true;
+    }
+
     private void writePage0(long pageNumber, byte[] pageData, boolean syncPage)
          throws IOException, StandardException
     {
@@ -304,6 +864,15 @@ class RAFContainer4 extends RAFContainer
             ioChannel = getChannel();
         }
 
+        if (SanityManager.DEBUG) {
+            if (pageNumber == FIRST_ALLOC_PAGE_NUMBER) {
+                // page 0
+                SanityManager.ASSERT(Thread.currentThread().holdsLock(this));
+            } else {
+                SanityManager.ASSERT(!Thread.currentThread().holdsLock(this));
+            }
+        }
+
         if(ioChannel != null) {
             ///////////////////////////////////////////////////
             //
@@ -359,8 +928,9 @@ class RAFContainer4 extends RAFContainer
                      */
                     if (getCommittedDropState()) {
                         if (SanityManager.DEBUG) {
-                            SanityManager.DEBUG_PRINT("RAFContainer4",
-                                "Write to a dropped and closed container discarded.");
+                            debugTrace(
+                                "write to a dropped and " +
+                                "closed container discarded.");
                         }
                         return;
                     } else {
@@ -410,14 +980,17 @@ class RAFContainer4 extends RAFContainer
                 }
             }
 
-        } else { // iochannel was not initialized, fall back to original method.
+        } else {
+            // iochannel was not initialized, fall back to original method.
             super.writePage(pageNumber, pageData, syncPage);
         }
     }
 
     /**
      * Write a sequence of bytes at the given offset in a file.
-     *
+     * <p/>
+     * override of FileContainer#writeAtOffset
+     * <p/>
      * @param file the file to write to
      * @param bytes the bytes to write
      * @param offset the offset to start writing at
@@ -438,7 +1011,9 @@ class RAFContainer4 extends RAFContainer
      * Read an embryonic page (that is, a section of the first alloc page that
      * is so large that we know all the borrowed space is included in it) from
      * the specified offset in a {@code StorageRandomAccessFile}.
-     *
+     * <p/>
+     * override of FileContainer#getEmbryonicPage
+     * <p/>
      * @param file the file to read from
      * @param offset where to start reading (normally
      * {@code FileContainer.FIRST_ALLOC_PAGE_OFFSET})
@@ -451,10 +1026,9 @@ class RAFContainer4 extends RAFContainer
     {
         FileChannel ioChannel = getChannel(file);
         if (ioChannel != null) {
-            ByteBuffer buffer =
-                    ByteBuffer.allocate(AllocPage.MAX_BORROWED_SPACE);
-            readFull(buffer, ioChannel, offset);
-            return buffer.array();
+            byte[] buffer = new byte[AllocPage.MAX_BORROWED_SPACE];
+            readPage(-1L, buffer, offset);
+            return buffer;
         } else {
             return super.getEmbryonicPage(file, offset);
         }
@@ -477,17 +1051,24 @@ class RAFContainer4 extends RAFContainer
                                 long position)
             throws IOException, StandardException
     {
+        boolean beforeOpen = srcChannel.isOpen();
+        boolean beforeInterrupted = Thread.currentThread().isInterrupted();
+
         while(dstBuffer.remaining() > 0) {
-            try {
-                if (srcChannel.read(dstBuffer,
+            if (srcChannel.read(dstBuffer,
                                     position + dstBuffer.position()) == -1) {
-                        throw new EOFException(
-                            "Reached end of file while attempting to read a "
-                            + "whole page.");
-                }
-            } catch (ClosedByInterruptException e) {
-                throw StandardException.newException(
-                    SQLState.FILE_IO_INTERRUPTED, e);
+                throw new EOFException(
+                    "Reached end of file while attempting to read a "
+                    + "whole page.");
+            }
+
+            // (**) Sun Java NIO is weird: it can close the channel due to an
+            // interrupt without throwing if bytes got transferred. Compensate,
+            // so we can clean up.  Bug 6979009,
+            // http://bugs.sun.com/view_bug.do?bug_id=6979009
+            if (Thread.currentThread().isInterrupted() &&
+                    !srcChannel.isOpen()) {
+                throw new ClosedByInterruptException();
             }
         }
     }
@@ -508,14 +1089,31 @@ class RAFContainer4 extends RAFContainer
     private final void writeFull(ByteBuffer srcBuffer,
                                  FileChannel dstChannel,
                                  long position)
-            throws IOException, StandardException
+            throws IOException
     {
+        boolean beforeOpen = dstChannel.isOpen();
+        boolean beforeInterrupted = Thread.currentThread().isInterrupted();
+
         while(srcBuffer.remaining() > 0) {
-            try {
-                dstChannel.write(srcBuffer, position + srcBuffer.position());
-            } catch (ClosedByInterruptException e) {
-                throw StandardException.newException(
-                    SQLState.FILE_IO_INTERRUPTED, e);
+            dstChannel.write(srcBuffer, position + srcBuffer.position());
+
+            // (**) Sun JAVA NIO is weird: it can close the channel due to an
+            // interrupt without throwing if bytes got transferred. Compensate,
+            // so we can clean up. Bug 6979009,
+            // http://bugs.sun.com/view_bug.do?bug_id=6979009
+            if (Thread.currentThread().isInterrupted() &&
+                    !dstChannel.isOpen()) {
+                throw new ClosedByInterruptException();
+            }
+        }
+    }
+
+    private static void debugTrace (String msg) {
+        if (SanityManager.DEBUG) { // redundant, just to remove code in insane
+            if (SanityManager.DEBUG_ON("RAF4")) {
+                SanityManager.DEBUG_PRINT(
+                    "RAF4",
+                    Thread.currentThread().getName() + " " + msg);
             }
         }
     }

Added: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/InterruptResilienceTest.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/InterruptResilienceTest.java?rev=1038440&view=auto
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/InterruptResilienceTest.java (added)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/InterruptResilienceTest.java Wed Nov 24 02:00:32 2010
@@ -0,0 +1,180 @@
+/*
+  Class org.apache.derbyTesting.functionTests.tests.store.InterruptResilienceTest
+
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to you under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+*/
+package org.apache.derbyTesting.functionTests.tests.store;
+
+import org.apache.derbyTesting.junit.BaseJDBCTestCase;
+import org.apache.derbyTesting.junit.CleanDatabaseTestSetup;
+import org.apache.derbyTesting.junit.TestConfiguration;
+import org.apache.derbyTesting.junit.JDBC;
+
+import junit.framework.Test;
+import junit.framework.TestSuite;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.CallableStatement;
+import java.sql.Statement;
+import java.sql.SQLException;
+import java.sql.DriverManager;
+
+/**
+ *   Test to reproduce and verify fix for DERBY-151.
+ */
+
+public class InterruptResilienceTest extends BaseJDBCTestCase
+{
+
+    public InterruptResilienceTest(String name)
+    {
+        super(name);
+    }
+
+
+    protected static Test baseSuite(String name)
+    {
+        TestSuite suite = new TestSuite(name);
+
+        if (JDBC.vmSupportsJDBC3()) {
+            // We need a JDBC level that supports DriverManager in order
+            // to run tests that access the database from a stored procedure
+            // using DriverManager and jdbc:default:connection.
+            // DriverManager is not supported with JSR169.
+
+            suite.addTestSuite(InterruptResilienceTest.class);
+            return new CleanDatabaseTestSetup(suite);
+        } else {
+            return suite;
+        }
+    }
+
+    public static Test suite()
+    {
+        TestSuite suite = new TestSuite("InterruptResilienceTest");
+        if (! isSunJVM()) {
+            // DERBY-4463 test fails on IBM VMs. Remove this
+            // exception when that issue is solved.
+            println("Test skipped for this VM, cf. DERBY-4463");
+            return suite;
+        }
+
+        suite.addTest(
+            baseSuite("InterruptResilienceTest:embedded"));
+
+        suite.addTest(
+            TestConfiguration.clientServerDecorator(
+                baseSuite("InterruptResilienceTest:c/s")));
+
+        return suite;
+    }
+
+    protected void setUp()
+            throws java.lang.Exception {
+        super.setUp();
+
+        Statement stmt = createStatement();
+        stmt.executeUpdate("CREATE TABLE irt(x int primary key)");
+        stmt.close();
+    }
+
+    /**
+     * Clean up the connection maintained by this test.
+     */
+    protected void tearDown()
+            throws java.lang.Exception {
+
+        Statement stmt = createStatement();
+        stmt.executeUpdate("DROP TABLE irt");
+        stmt.close();
+
+        super.tearDown();
+    }
+
+    // We do the actual test inside a stored procedure so we can test this for
+    // client/server as well, otherwise we would just interrupt the client
+    // thread.
+    public static void irt() throws SQLException {
+        Connection c = DriverManager.getConnection("jdbc:default:connection");
+        c.setAutoCommit(false);
+        PreparedStatement insert = null;
+        long seen = 0;
+        long lost = 0;
+        try {
+            insert = c.prepareStatement("insert into irt values (?)");
+
+            // About 75000 iterations is needed to see any concurrency
+            // wait on RawDaemonThread during recovery, cf.
+            // running with debug flag "RAF4Recovery".
+            for (int i = 0; i < 100000; i++) {
+                if (i % 1000 == 0) {
+                    c.commit();
+                }
+
+                // Make sure to interrupt after commit, since log writing isn't
+                // safe for interrupts (on Solaris only) yet.
+                Thread.currentThread().interrupt();
+
+                insert.setInt(1, i);
+                insert.executeUpdate();
+
+                if (Thread.interrupted()) { // test and reset
+                    seen++;
+                    // println(ff() + "interrupt seen");
+                } else {
+                    // println(ff() + "interrupt lost");
+                    lost++;
+                }
+
+            }
+        } finally {
+            // always clear flag
+            Thread.interrupted();
+
+            if (insert != null) {
+                try {
+                    insert.close(); // already closed by error
+                } catch (SQLException e) {
+                }
+            }
+
+            c.close();
+            println("interrupts recovered: " + seen);
+            println("interrupts lost: " + lost + " (" +
+                    (lost * 100.0/(seen + lost)) + "%)");
+        }
+    }
+
+    public void testIRT () throws SQLException {
+        Statement s = createStatement();
+        s.executeUpdate(
+            "create procedure IRT () MODIFIES SQL DATA " +
+            "external name 'org.apache.derbyTesting.functionTests" +
+            ".tests.store.InterruptResilienceTest.irt' " +
+            "language java parameter style java");
+
+
+        s.executeUpdate("call IRT()");
+
+    }
+
+
+    // private static String ff() {
+    //     return Thread.currentThread().getName();
+    // }
+}

Propchange: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/InterruptResilienceTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/_Suite.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/_Suite.java?rev=1038440&r1=1038439&r2=1038440&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/_Suite.java (original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/store/_Suite.java Wed Nov 24 02:00:32 2010
@@ -57,7 +57,7 @@ public class _Suite extends BaseTestCase
         suite.addTest(StreamingColumnTest.suite());
         suite.addTest(Derby3625Test.suite());
         suite.addTest(Derby4577Test.suite());
-        suite.addTest(Derby151Test.suite());
+        suite.addTest(InterruptResilienceTest.suite());
         suite.addTest(Derby4676Test.suite());
         suite.addTest(BootLockTest.suite());
         suite.addTest(PositionedStoreStreamTest.suite());



Mime
View raw message