hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject svn commit: r1175170 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/coprocessor/ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/regionserver/ src/main/resources/ src/test/java/org/apache/hadoop/hbase/co...
Date Sat, 24 Sep 2011 14:11:06 GMT
Author: tedyu
Date: Sat Sep 24 14:11:05 2011
New Revision: 1175170

URL: http://svn.apache.org/viewvc?rev=1175170&view=rev
Log:
HBASE-4014 Coprocessors: Flag the presence of coprocessors in logged exceptions(Eugene Koontz)

Added:
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java
    hbase/trunk/src/main/resources/hbase-default.xml

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Sat Sep 24 14:11:05 2011
@@ -537,6 +537,8 @@ Release 0.92.0 - Unreleased
                next() unless the next KV is needed
                (Kannan Muthukkaruppan)
    HBASE-4280  [replication] ReplicationSink can deadlock itself via handlers
+   HBASE-4014  Coprocessors: Flag the presence of coprocessors in logged
+               exceptions (Eugene Koontz)
 
   TASKS
    HBASE-3559  Move report of split to master OFF the heartbeat channel

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java Sat Sep 24 14:11:05 2011
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Coprocessor;
 import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.client.*;
@@ -35,6 +36,7 @@ import org.apache.hadoop.hbase.ipc.Copro
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.SortedCopyOnWriteSet;
 import org.apache.hadoop.hbase.util.VersionInfo;
+import org.apache.hadoop.hbase.Server;
 
 import java.io.File;
 import java.io.IOException;
@@ -71,6 +73,12 @@ public abstract class CoprocessorHost<E 
     pathPrefix = UUID.randomUUID().toString();
   }
 
+  private static Set<String> coprocessorNames =
+      Collections.synchronizedSet(new HashSet<String>());
+  public static Set<String> getLoadedCoprocessors() {
+      return coprocessorNames;
+  }
+
   /**
    * Load system coprocessors. Read the class names from configuration.
    * Called by constructor.
@@ -156,7 +164,7 @@ public abstract class CoprocessorHost<E 
       // load the jar and get the implementation main class
       String cp = System.getProperty("java.class.path");
       // NOTE: Path.toURL is deprecated (toURI instead) but the URLClassLoader
-      // unsuprisingly wants URLs, not URIs; so we will use the deprecated
+      // unsurprisingly wants URLs, not URIs; so we will use the deprecated
       // method which returns URLs for as long as it is available
       List<URL> paths = new ArrayList<URL>();
       paths.add(new File(dst.toString()).getCanonicalFile().toURL());
@@ -213,6 +221,9 @@ public abstract class CoprocessorHost<E 
     if (env instanceof Environment) {
       ((Environment)env).startup();
     }
+    // HBASE-4014: maintain list of loaded coprocessors for later crash analysis
+    // if server (master or regionserver) aborts.
+    coprocessorNames.add(implClass.getName());
     return env;
   }
 
@@ -576,4 +587,65 @@ public abstract class CoprocessorHost<E 
       return new HTableWrapper(tableName);
     }
   }
+
+  protected void abortServer(final String service,
+      final Server server,
+      final CoprocessorEnvironment environment,
+      final Throwable e) {
+    String coprocessorName = (environment.getInstance()).toString();
+    server.abort("Aborting service: " + service + " running on : "
+            + server.getServerName() + " because coprocessor: "
+            + coprocessorName + " threw an exception.", e);
+  }
+
+  protected void abortServer(final CoprocessorEnvironment environment,
+                             final Throwable e) {
+    String coprocessorName = (environment.getInstance()).toString();
+    LOG.error("The coprocessor: " + coprocessorName + " threw an unexpected " +
+        "exception: " + e + ", but there's no specific implementation of " +
+        " abortServer() for this coprocessor's environment.");
+  }
+
+
+  /**
+   * This is used by coprocessor hooks which are declared to throw IOException
+   * (or its subtypes). For such hooks, we should handle throwable objects
+   * depending on the Throwable's type. Those which are instances of
+   * IOException should be passed on to the client. This is in conformance with
+   * the HBase idiom regarding IOException: that it represents a circumstance
+   * that should be passed along to the client for its own handling. For
+   * example, a coprocessor that implements access controls would throw a
+   * subclass of IOException, such as AccessDeniedException, in its preGet()
+   * method to prevent an unauthorized client's performing a Get on a particular
+   * table.
+   * @param env Coprocessor Environment
+   * @param e Throwable object thrown by coprocessor.
+   * @exception IOException Exception
+   */
+  protected void handleCoprocessorThrowable(final CoprocessorEnvironment env,
+                                            final Throwable e)
+      throws IOException {
+    if (e instanceof IOException) {
+      throw (IOException)e;
+    }
+    // If we got here, e is not an IOException. A loaded coprocessor has a
+    // fatal bug, and the server (master or regionserver) should remove the
+    // faulty coprocessor from its set of active coprocessors. Setting
+    // 'hbase.coprocessor.abortonerror' to true will cause abortServer(),
+    // which may be useful in development and testing environments where
+    // 'failing fast' for error analysis is desired.
+    if (env.getConfiguration().getBoolean("hbase.coprocessor.abortonerror",false)) {
+      // server is configured to abort.
+      abortServer(env, e);
+    } else {
+      LOG.error("Removing coprocessor '" + env.toString() + "' from " +
+          "environment because it threw:  " + e,e);
+      coprocessors.remove(env);
+      throw new DoNotRetryIOException("Coprocessor: '" + env.toString() +
+          "' threw: '" + e + "' and has been removed" + "from the active " +
+          "coprocessor set.", e);
+    }
+  }
 }
+
+

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Sat Sep 24 14:11:05 2011
@@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.client.Ge
 import org.apache.hadoop.hbase.client.MetaScanner;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
 import org.apache.hadoop.hbase.executor.ExecutorService;
 import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
 import org.apache.hadoop.hbase.ipc.HBaseRPC;
@@ -1187,8 +1188,25 @@ implements HMasterInterface, HMasterRegi
     return fileSystemManager.getClusterId();
   }
 
+  /**
+   * The set of loaded coprocessors is stored in a static set. Since it's
+   * statically allocated, it does not require that HMaster's cpHost be
+   * initialized prior to accessing it.
+   * @return a String representation of the set of names of the loaded
+   * coprocessors.
+   */
+  public static String getLoadedCoprocessors() {
+    return CoprocessorHost.getLoadedCoprocessors().toString();
+  }
+
   @Override
   public void abort(final String msg, final Throwable t) {
+    if (cpHost != null) {
+      // HBASE-4014: dump a list of loaded coprocessors.
+      LOG.fatal("Master server abort: loaded coprocessors are: " +
+          getLoadedCoprocessors());
+    }
+
     if (abortNow(msg, t)) {
       if (t != null) LOG.fatal(msg, t);
       else LOG.fatal(msg);

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java Sat Sep 24 14:11:05 2011
@@ -20,6 +20,8 @@
 
 package org.apache.hadoop.hbase.master;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.coprocessor.*;
@@ -34,6 +36,8 @@ import java.io.IOException;
 public class MasterCoprocessorHost
     extends CoprocessorHost<MasterCoprocessorHost.MasterEnvironment> {
 
+  private static final Log LOG = LogFactory.getLog(MasterCoprocessorHost.class);
+
   /**
    * Coprocessor environment extension providing access to master related
    * services.
@@ -69,6 +73,11 @@ public class MasterCoprocessorHost
         masterServices);
   }
 
+  @Override
+  protected void abortServer(final CoprocessorEnvironment env, final Throwable e) {
+    abortServer("master", masterServices, env, e);
+  }
+
   /* Implementation of hooks for invoking MasterObservers */
   void preCreateTable(HTableDescriptor htd, HRegionInfo[] regions)
     throws IOException {
@@ -76,7 +85,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preCreateTable(ctx, htd, regions);
+        try {
+          ((MasterObserver)env.getInstance()).preCreateTable(ctx, htd, regions);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -90,7 +103,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postCreateTable(ctx, htd, regions);
+        try {
+            ((MasterObserver)env.getInstance()).postCreateTable(ctx, htd, regions);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -103,7 +120,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preDeleteTable(ctx, tableName);
+        try {
+          ((MasterObserver)env.getInstance()).preDeleteTable(ctx, tableName);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -116,7 +137,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postDeleteTable(ctx, tableName);
+        try {
+          ((MasterObserver)env.getInstance()).postDeleteTable(ctx, tableName);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -130,7 +155,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preModifyTable(ctx, tableName, htd);
+        try {
+          ((MasterObserver)env.getInstance()).preModifyTable(ctx, tableName,
+              htd);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -144,7 +174,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postModifyTable(ctx, tableName, htd);
+        try {
+          ((MasterObserver)env.getInstance()).postModifyTable(ctx, tableName,
+              htd);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -159,7 +194,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preAddColumn(ctx, tableName, column);
+        try {
+          ((MasterObserver)env.getInstance()).preAddColumn(ctx, tableName, column);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -175,7 +214,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postAddColumn(ctx, tableName, column);
+        try {
+          ((MasterObserver)env.getInstance()).postAddColumn(ctx, tableName,
+              column);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -190,8 +234,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preModifyColumn(
+        try {
+          ((MasterObserver)env.getInstance()).preModifyColumn(
             ctx, tableName, descriptor);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -207,8 +255,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postModifyColumn(
-            ctx, tableName, descriptor);
+        try {
+          ((MasterObserver)env.getInstance()).postModifyColumn(
+              ctx, tableName, descriptor);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -223,7 +275,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preDeleteColumn(ctx, tableName, c);
+        try {
+          ((MasterObserver)env.getInstance()).preDeleteColumn(ctx, tableName, c);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -239,7 +295,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postDeleteColumn(ctx, tableName, c);
+        try {
+          ((MasterObserver)env.getInstance()).postDeleteColumn(ctx, tableName,
+              c);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -252,7 +313,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preEnableTable(ctx, tableName);
+        try {
+          ((MasterObserver)env.getInstance()).preEnableTable(ctx, tableName);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -265,7 +330,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postEnableTable(ctx, tableName);
+        try {
+          ((MasterObserver)env.getInstance()).postEnableTable(ctx, tableName);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -278,7 +347,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preDisableTable(ctx, tableName);
+        try {
+          ((MasterObserver)env.getInstance()).preDisableTable(ctx, tableName);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -291,7 +364,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postDisableTable(ctx, tableName);
+        try {
+          ((MasterObserver)env.getInstance()).postDisableTable(ctx, tableName);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -306,8 +383,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preMove(
-            ctx, region, srcServer, destServer);
+        try {
+          ((MasterObserver)env.getInstance()).preMove(
+              ctx, region, srcServer, destServer);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -323,8 +404,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postMove(
-            ctx, region, srcServer, destServer);
+        try {
+          ((MasterObserver)env.getInstance()).postMove(
+              ctx, region, srcServer, destServer);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -338,7 +423,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver) env.getInstance()).preAssign(ctx, regionInfo);
+        try {
+          ((MasterObserver) env.getInstance()).preAssign(ctx, regionInfo);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -353,7 +442,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver) env.getInstance()).postAssign(ctx, regionInfo);
+        try {
+          ((MasterObserver)env.getInstance()).postAssign(ctx, regionInfo);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -368,8 +461,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preUnassign(
-            ctx, regionInfo, force);
+        try {
+          ((MasterObserver)env.getInstance()).preUnassign(
+              ctx, regionInfo, force);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -385,8 +482,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postUnassign(
-            ctx, regionInfo, force);
+        try {
+          ((MasterObserver)env.getInstance()).postUnassign(
+              ctx, regionInfo, force);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -400,7 +501,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preBalance(ctx);
+        try {
+          ((MasterObserver)env.getInstance()).preBalance(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -415,7 +520,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postBalance(ctx);
+        try {
+          ((MasterObserver)env.getInstance()).postBalance(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -429,8 +538,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        balance = ((MasterObserver)env.getInstance()).preBalanceSwitch(
-            ctx, balance);
+        try {
+          balance = ((MasterObserver)env.getInstance()).preBalanceSwitch(
+              ctx, balance);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -445,8 +558,12 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postBalanceSwitch(
-            ctx, oldValue, newValue);
+        try {
+          ((MasterObserver)env.getInstance()).postBalanceSwitch(
+              ctx, oldValue, newValue);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -459,7 +576,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preShutdown(ctx);
+        try {
+          ((MasterObserver)env.getInstance()).preShutdown(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -472,7 +593,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).preStopMaster(ctx);
+        try {
+          ((MasterObserver)env.getInstance()).preStopMaster(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -485,7 +610,11 @@ public class MasterCoprocessorHost
     for (MasterEnvironment env: coprocessors) {
       if (env.getInstance() instanceof MasterObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((MasterObserver)env.getInstance()).postStartMaster(ctx);
+        try {
+          ((MasterObserver)env.getInstance()).postStartMaster(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Sat Sep 24 14:11:05 2011
@@ -92,6 +92,7 @@ import org.apache.hadoop.hbase.client.Ro
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.coprocessor.Exec;
 import org.apache.hadoop.hbase.client.coprocessor.ExecResult;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
 import org.apache.hadoop.hbase.executor.ExecutorService;
 import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
 import org.apache.hadoop.hbase.filter.BinaryComparator;
@@ -617,7 +618,7 @@ public class HRegionServer implements HR
 
     try {
       // Try and register with the Master; tell it we are here.  Break if
-      // server is stopped or the clusterup flag is down of hdfs went wacky.
+      // server is stopped or the clusterup flag is down or hdfs went wacky.
       while (keepLooping()) {
         MapWritable w = reportForDuty();
         if (w == null) {
@@ -1506,6 +1507,11 @@ public class HRegionServer implements HR
     }
     this.abortRequested = true;
     this.reservedSpace.clear();
+    // HBASE-4014: show list of coprocessors that were loaded to help debug
+    // regionserver crashes.Note that we're implicitly using
+    // java.util.HashSet's toString() method to print the coprocessor names.
+    LOG.fatal("RegionServer abort: loaded coprocessors are: " +
+        CoprocessorHost.getLoadedCoprocessors());
     if (this.metrics != null) {
       LOG.info("Dump of metrics: " + this.metrics);
     }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java Sat Sep 24 14:11:05 2011
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HRegionIn
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.Coprocessor;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.client.*;
 import org.apache.hadoop.hbase.coprocessor.*;
@@ -196,6 +197,33 @@ public class RegionCoprocessorHost
         rsServices);
   }
 
+  @Override
+  protected void abortServer(final CoprocessorEnvironment env, final Throwable e) {
+    abortServer("regionserver", rsServices, env, e);
+  }
+
+  /**
+   * HBASE-4014 : This is used by coprocessor hooks which are not declared to throw exceptions.
+   *
+   * For example, {@link
+   * org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost#preOpen()} and
+   * {@link org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost#postOpen()} are such hooks.
+   *
+   * See also {@link org.apache.hadoop.hbase.master.MasterCoprocessorHost#handleCoprocessorThrowable()}
+   * @param env: The coprocessor that threw the exception.
+   * @param e: The exception that was thrown.
+   */
+  private void handleCoprocessorThrowableNoRethrow(
+      final CoprocessorEnvironment env, final Throwable e) {
+    try {
+      handleCoprocessorThrowable(env,e);
+    } catch (IOException ioe) {
+      // We cannot throw exceptions from the caller hook, so ignore.
+      LOG.warn("handleCoprocessorThrowable() threw an IOException while attempting to handle Throwable " + e
+        + ". Ignoring.",e);
+    }
+  }
+
   /**
    * Invoked before a region open
    */
@@ -204,7 +232,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preOpen(ctx);
+         try {
+          ((RegionObserver)env.getInstance()).preOpen(ctx);
+         } catch (Throwable e) {
+           handleCoprocessorThrowableNoRethrow(env, e);
+         }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -220,7 +252,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postOpen(ctx);
+        try {
+          ((RegionObserver)env.getInstance()).postOpen(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -237,7 +273,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preClose(ctx, abortRequested);
+        try {
+          ((RegionObserver)env.getInstance()).preClose(ctx, abortRequested);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
       }
     }
   }
@@ -251,7 +291,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postClose(ctx, abortRequested);
+        try {
+          ((RegionObserver)env.getInstance()).postClose(ctx, abortRequested);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
+
       }
       shutdown(env);
     }
@@ -293,8 +338,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postCompactSelection(
-            ctx, store, selected);
+        try {
+          ((RegionObserver)env.getInstance()).postCompactSelection(
+              ctx, store, selected);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env,e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -313,8 +362,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        scanner = ((RegionObserver)env.getInstance()).preCompact(
-            ctx, store, scanner);
+        try {
+          scanner = ((RegionObserver)env.getInstance()).preCompact(
+              ctx, store, scanner);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env,e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -334,7 +387,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postCompact(ctx, store, resultFile);
+        try {
+          ((RegionObserver)env.getInstance()).postCompact(ctx, store, resultFile);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -350,7 +407,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preFlush(ctx);
+        try {
+          ((RegionObserver)env.getInstance()).preFlush(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -366,7 +427,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postFlush(ctx);
+        try {
+          ((RegionObserver)env.getInstance()).postFlush(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -382,7 +447,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preSplit(ctx);
+        try {
+          ((RegionObserver)env.getInstance()).preSplit(ctx);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -400,7 +469,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postSplit(ctx, l, r);
+        try {
+          ((RegionObserver)env.getInstance()).postSplit(ctx, l, r);
+        } catch (Throwable e) {
+          handleCoprocessorThrowableNoRethrow(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -424,8 +497,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preGetClosestRowBefore(ctx, row, family,
-          result);
+        try {
+          ((RegionObserver)env.getInstance()).preGetClosestRowBefore(ctx, row,
+              family, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -447,8 +524,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postGetClosestRowBefore(ctx, row, family,
-          result);
+        try {
+          ((RegionObserver)env.getInstance()).postGetClosestRowBefore(ctx, row,
+              family, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -468,7 +549,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preGet(ctx, get, results);
+        try {
+          ((RegionObserver)env.getInstance()).preGet(ctx, get, results);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -490,7 +575,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postGet(ctx, get, results);
+        try {
+          ((RegionObserver)env.getInstance()).postGet(ctx, get, results);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -511,7 +600,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        exists = ((RegionObserver)env.getInstance()).preExists(ctx, get, exists);
+        try {
+          exists = ((RegionObserver)env.getInstance()).preExists(ctx, get, exists);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -533,7 +626,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        exists = ((RegionObserver)env.getInstance()).postExists(ctx, get, exists);
+        try {
+          exists = ((RegionObserver)env.getInstance()).postExists(ctx, get, exists);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -555,7 +652,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).prePut(ctx, familyMap, writeToWAL);
+        try {
+          ((RegionObserver)env.getInstance()).prePut(ctx, familyMap, writeToWAL);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -576,7 +677,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postPut(ctx, familyMap, writeToWAL);
+        try {
+          ((RegionObserver)env.getInstance()).postPut(ctx, familyMap, writeToWAL);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -597,7 +702,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preDelete(ctx, familyMap, writeToWAL);
+        try {
+          ((RegionObserver)env.getInstance()).preDelete(ctx, familyMap, writeToWAL);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -618,7 +727,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postDelete(ctx, familyMap, writeToWAL);
+        try {
+          ((RegionObserver)env.getInstance()).postDelete(ctx, familyMap, writeToWAL);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -647,8 +760,14 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        result = ((RegionObserver)env.getInstance()).preCheckAndPut(ctx, row, family,
-          qualifier, compareOp, comparator, put, result);
+        try {
+          result = ((RegionObserver)env.getInstance()).preCheckAndPut(ctx, row, family,
+            qualifier, compareOp, comparator, put, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
+
+
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -676,8 +795,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        result = ((RegionObserver)env.getInstance()).postCheckAndPut(ctx, row,
-          family, qualifier, compareOp, comparator, put, result);
+        try {
+          result = ((RegionObserver)env.getInstance()).postCheckAndPut(ctx, row,
+            family, qualifier, compareOp, comparator, put, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -707,8 +830,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        result = ((RegionObserver)env.getInstance()).preCheckAndDelete(ctx, row,
-          family, qualifier, compareOp, comparator, delete, result);
+        try {
+          result = ((RegionObserver)env.getInstance()).preCheckAndDelete(ctx, row,
+            family, qualifier, compareOp, comparator, delete, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -736,9 +863,13 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        result = ((RegionObserver)env.getInstance())
-          .postCheckAndDelete(ctx, row, family, qualifier, compareOp,
-            comparator, delete, result);
+        try {
+          result = ((RegionObserver)env.getInstance())
+            .postCheckAndDelete(ctx, row, family, qualifier, compareOp,
+              comparator, delete, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -765,8 +896,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        amount = ((RegionObserver)env.getInstance()).preIncrementColumnValue(ctx,
-            row, family, qualifier, amount, writeToWAL);
+        try {
+          amount = ((RegionObserver)env.getInstance()).preIncrementColumnValue(ctx,
+              row, family, qualifier, amount, writeToWAL);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -793,8 +928,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        result = ((RegionObserver)env.getInstance()).postIncrementColumnValue(ctx,
-            row, family, qualifier, amount, writeToWAL, result);
+        try {
+          result = ((RegionObserver)env.getInstance()).postIncrementColumnValue(ctx,
+              row, family, qualifier, amount, writeToWAL, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -817,7 +956,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preIncrement(ctx, increment, result);
+        try {
+          ((RegionObserver)env.getInstance()).preIncrement(ctx, increment, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -838,7 +981,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postIncrement(ctx, increment, result);
+        try {
+          ((RegionObserver)env.getInstance()).postIncrement(ctx, increment, result);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -859,7 +1006,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        s = ((RegionObserver)env.getInstance()).preScannerOpen(ctx, scan, s);
+        try {
+          s = ((RegionObserver)env.getInstance()).preScannerOpen(ctx, scan, s);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -881,7 +1032,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        s = ((RegionObserver)env.getInstance()).postScannerOpen(ctx, scan, s);
+        try {
+          s = ((RegionObserver)env.getInstance()).postScannerOpen(ctx, scan, s);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -906,8 +1061,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        hasNext = ((RegionObserver)env.getInstance()).preScannerNext(ctx, s, results,
-          limit, hasNext);
+        try {
+          hasNext = ((RegionObserver)env.getInstance()).preScannerNext(ctx, s, results,
+            limit, hasNext);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -932,8 +1091,12 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        hasMore = ((RegionObserver)env.getInstance()).postScannerNext(ctx, s,
-          results, limit, hasMore);
+        try {
+          hasMore = ((RegionObserver)env.getInstance()).postScannerNext(ctx, s,
+            results, limit, hasMore);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -954,7 +1117,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preScannerClose(ctx, s);
+        try {
+          ((RegionObserver)env.getInstance()).preScannerClose(ctx, s);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
@@ -974,7 +1141,11 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postScannerClose(ctx, s);
+        try {
+          ((RegionObserver)env.getInstance()).postScannerClose(ctx, s);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
@@ -996,14 +1167,17 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).preWALRestore(ctx, info, logKey,
-            logEdit);
+        try {
+          ((RegionObserver)env.getInstance()).preWALRestore(ctx, info, logKey,
+              logEdit);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         bypass |= ctx.shouldBypass();
         if (ctx.shouldComplete()) {
           break;
         }
       }
-     
     }
     return bypass;
   }
@@ -1020,13 +1194,16 @@ public class RegionCoprocessorHost
     for (RegionEnvironment env: coprocessors) {
       if (env.getInstance() instanceof RegionObserver) {
         ctx = ObserverContext.createAndPrepare(env, ctx);
-        ((RegionObserver)env.getInstance()).postWALRestore(ctx, info,
-            logKey, logEdit);
+        try {
+          ((RegionObserver)env.getInstance()).postWALRestore(ctx, info,
+              logKey, logEdit);
+        } catch (Throwable e) {
+          handleCoprocessorThrowable(env, e);
+        }
         if (ctx.shouldComplete()) {
           break;
         }
       }
-      
     }
   }
 }

Modified: hbase/trunk/src/main/resources/hbase-default.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/resources/hbase-default.xml?rev=1175170&r1=1175169&r2=1175170&view=diff
==============================================================================
--- hbase/trunk/src/main/resources/hbase-default.xml (original)
+++ hbase/trunk/src/main/resources/hbase-default.xml Sat Sep 24 14:11:05 2011
@@ -725,4 +725,15 @@
     version is X.X.X-SNAPSHOT"
     </description>
   </property>
+  <property>
+      <name>hbase.coprocessor.abortonerror</name>
+      <value>false</value>
+      <description>
+      Set to true to cause the hosting server (master or regionserver) to
+      abort if a coprocessor throws a Throwable object that is not IOException or
+      a subclass of IOException. Setting it to true might be useful in development
+      environments where one wants to terminate the server as soon as possible to
+      simplify coprocessor failure analysis.
+      </description>
+  </property>
 </configuration>

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java?rev=1175170&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java Sat Sep 24 14:11:05 2011
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.coprocessor;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Abortable;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * Tests unhandled exceptions thrown by coprocessors running on master.
+ * Expected result is that the master will abort with an informative
+ * error message describing the set of its loaded coprocessors for crash diagnosis.
+ * (HBASE-4014).
+ */
+public class TestMasterCoprocessorExceptionWithAbort {
+
+  public static class MasterTracker extends ZooKeeperNodeTracker {
+    public boolean masterZKNodeWasDeleted = false;
+
+    public MasterTracker(ZooKeeperWatcher zkw, String masterNode, Abortable abortable) {
+      super(zkw, masterNode, abortable);
+    }
+
+    @Override
+    public synchronized void nodeDeleted(String path) {
+      if (path.equals("/hbase/master")) {
+        masterZKNodeWasDeleted = true;
+      }
+    }
+  }
+
+  public static class CreateTableThread extends Thread {
+    HBaseTestingUtility UTIL;
+    public CreateTableThread(HBaseTestingUtility UTIL) {
+      this.UTIL = UTIL;
+    }
+
+    @Override
+    public void run() {
+      // create a table : master coprocessor will throw an exception and not
+      // catch it.
+      HTableDescriptor htd = new HTableDescriptor(TEST_TABLE);
+      htd.addFamily(new HColumnDescriptor(TEST_FAMILY));
+      try {
+        HBaseAdmin admin = UTIL.getHBaseAdmin();
+        admin.createTable(htd);
+        fail("BuggyMasterObserver failed to throw an exception.");
+      } catch (IOException e) {
+        assertEquals("HBaseAdmin threw an interrupted IOException as expected.",
+            e.getClass().getName(), "java.io.InterruptedIOException");
+      }
+   }
+  }
+
+  public static class BuggyMasterObserver extends BaseMasterObserver {
+    private boolean preCreateTableCalled;
+    private boolean postCreateTableCalled;
+    private boolean startCalled;
+    private boolean postStartMasterCalled;
+
+    @Override
+    public void postCreateTable(ObserverContext<MasterCoprocessorEnvironment> env,
+        HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
+      // cause a NullPointerException and don't catch it: this will cause the
+      // master to abort().
+      Integer i;
+      i = null;
+      i = i++;
+    }
+
+    public boolean wasCreateTableCalled() {
+      return preCreateTableCalled && postCreateTableCalled;
+    }
+
+    @Override
+    public void postStartMaster(ObserverContext<MasterCoprocessorEnvironment> ctx)
+        throws IOException {
+      postStartMasterCalled = true;
+    }
+
+    public boolean wasStartMasterCalled() {
+      return postStartMasterCalled;
+    }
+
+    @Override
+    public void start(CoprocessorEnvironment env) throws IOException {
+      startCalled = true;
+    }
+
+    public boolean wasStarted() {
+      return startCalled;
+    }
+  }
+
+  private static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static byte[] TEST_TABLE = Bytes.toBytes("observed_table");
+  private static byte[] TEST_FAMILY = Bytes.toBytes("fam1");
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    Configuration conf = UTIL.getConfiguration();
+    conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        BuggyMasterObserver.class.getName());
+    conf.set("hbase.coprocessor.abortonerror", "true");
+    UTIL.startMiniCluster(2);
+  }
+
+  @AfterClass
+  public static void teardownAfterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test(timeout=30000)
+  public void testExceptionFromCoprocessorWhenCreatingTable()
+      throws IOException {
+    MiniHBaseCluster cluster = UTIL.getHBaseCluster();
+
+    HMaster master = cluster.getMaster();
+    MasterCoprocessorHost host = master.getCoprocessorHost();
+    BuggyMasterObserver cp = (BuggyMasterObserver)host.findCoprocessor(
+        BuggyMasterObserver.class.getName());
+    assertFalse("No table created yet", cp.wasCreateTableCalled());
+
+    // set a watch on the zookeeper /hbase/master node. If the master dies,
+    // the node will be deleted.
+    ZooKeeperWatcher zkw = new ZooKeeperWatcher(UTIL.getConfiguration(),
+      "unittest", new Abortable() {
+      @Override
+      public void abort(String why, Throwable e) {
+        throw new RuntimeException("Fatal ZK error: " + why, e);
+      }
+      @Override
+      public boolean isAborted() {
+        return false;
+      }
+    });
+
+    MasterTracker masterTracker = new MasterTracker(zkw,"/hbase/master",
+        new Abortable() {
+          @Override
+          public void abort(String why, Throwable e) {
+            throw new RuntimeException("Fatal ZK master tracker error, why=", e);
+          }
+          @Override
+          public boolean isAborted() {
+            return false;
+          }
+        });
+
+    masterTracker.start();
+    zkw.registerListener(masterTracker);
+
+    // Test (part of the) output that should have be printed by master when it aborts:
+    // (namely the part that shows the set of loaded coprocessors).
+    // In this test, there is only a single coprocessor (BuggyMasterObserver).
+    assertTrue(master.getLoadedCoprocessors().
+      equals("[" +
+          TestMasterCoprocessorExceptionWithAbort.BuggyMasterObserver.class.getName() +
+          "]"));
+
+    CreateTableThread createTableThread = new CreateTableThread(UTIL);
+
+    // Attempting to create a table (using createTableThread above) triggers an NPE in BuggyMasterObserver.
+    // Master will then abort and the /hbase/master zk node will be deleted.
+    createTableThread.start();
+
+    // Wait up to 30 seconds for master's /hbase/master zk node to go away after master aborts.
+    for (int i = 0; i < 30; i++) {
+      if (masterTracker.masterZKNodeWasDeleted == true) {
+        break;
+      }
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        fail("InterruptedException while waiting for master zk node to "
+            + "be deleted.");
+      }
+    }
+
+    assertTrue("Master aborted on coprocessor exception, as expected.",
+        masterTracker.masterZKNodeWasDeleted);
+
+    createTableThread.interrupt();
+    try {
+      createTableThread.join(1000);
+    } catch (InterruptedException e) {
+      assertTrue("Ignoring InterruptedException while waiting for " +
+          " createTableThread.join().", true);
+    }
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java?rev=1175170&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java Sat Sep 24 14:11:05 2011
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.coprocessor;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Abortable;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * Tests unhandled exceptions thrown by coprocessors running on master.
+ * Expected result is that the master will remove the buggy coprocessor from
+ * its set of coprocessors and throw a org.apache.hadoop.hbase.DoNotRetryIOException
+ * back to the client.
+ * (HBASE-4014).
+ */
+public class TestMasterCoprocessorExceptionWithRemove {
+
+  public static class MasterTracker extends ZooKeeperNodeTracker {
+    public boolean masterZKNodeWasDeleted = false;
+
+    public MasterTracker(ZooKeeperWatcher zkw, String masterNode, Abortable abortable) {
+      super(zkw, masterNode, abortable);
+    }
+
+    @Override
+    public synchronized void nodeDeleted(String path) {
+      if (path.equals("/hbase/master")) {
+        masterZKNodeWasDeleted = true;
+      }
+    }
+  }
+
+  public static class BuggyMasterObserver extends BaseMasterObserver {
+    private boolean preCreateTableCalled;
+    private boolean postCreateTableCalled;
+    private boolean startCalled;
+    private boolean postStartMasterCalled;
+
+    @Override
+    public void postCreateTable(ObserverContext<MasterCoprocessorEnvironment> env,
+        HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
+      // Cause a NullPointerException and don't catch it: this should cause the
+      // master to throw an o.apache.hadoop.hbase.DoNotRetryIOException to the
+      // client.
+      Integer i;
+      i = null;
+      i = i++;
+    }
+
+    public boolean wasCreateTableCalled() {
+      return preCreateTableCalled && postCreateTableCalled;
+    }
+
+    @Override
+    public void postStartMaster(ObserverContext<MasterCoprocessorEnvironment> ctx)
+        throws IOException {
+      postStartMasterCalled = true;
+    }
+
+    public boolean wasStartMasterCalled() {
+      return postStartMasterCalled;
+    }
+
+    @Override
+    public void start(CoprocessorEnvironment env) throws IOException {
+      startCalled = true;
+    }
+
+    public boolean wasStarted() {
+      return startCalled;
+    }
+  }
+
+  private static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  private static byte[] TEST_TABLE1 = Bytes.toBytes("observed_table1");
+  private static byte[] TEST_FAMILY1 = Bytes.toBytes("fam1");
+
+  private static byte[] TEST_TABLE2 = Bytes.toBytes("table2");
+  private static byte[] TEST_FAMILY2 = Bytes.toBytes("fam2");
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    Configuration conf = UTIL.getConfiguration();
+    conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        BuggyMasterObserver.class.getName());
+    UTIL.startMiniCluster(2);
+  }
+
+  @AfterClass
+  public static void teardownAfterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test(timeout=30000)
+  public void testExceptionFromCoprocessorWhenCreatingTable()
+      throws IOException {
+    MiniHBaseCluster cluster = UTIL.getHBaseCluster();
+
+    HMaster master = cluster.getMaster();
+    MasterCoprocessorHost host = master.getCoprocessorHost();
+    BuggyMasterObserver cp = (BuggyMasterObserver)host.findCoprocessor(
+        BuggyMasterObserver.class.getName());
+    assertFalse("No table created yet", cp.wasCreateTableCalled());
+
+    // Set a watch on the zookeeper /hbase/master node. If the master dies,
+    // the node will be deleted.
+    // Master should *NOT* die:
+    // we are testing that the default setting of hbase.coprocessor.abortonerror
+    // =false
+    // is respected.
+    ZooKeeperWatcher zkw = new ZooKeeperWatcher(UTIL.getConfiguration(),
+      "unittest", new Abortable() {
+      @Override
+      public void abort(String why, Throwable e) {
+        throw new RuntimeException("Fatal ZK error: " + why, e);
+      }
+      @Override
+      public boolean isAborted() {
+        return false;
+      }
+    });
+
+    MasterTracker masterTracker = new MasterTracker(zkw,"/hbase/master",
+        new Abortable() {
+          @Override
+          public void abort(String why, Throwable e) {
+            throw new RuntimeException("Fatal Zookeeper tracker error, why=", e);
+          }
+          @Override
+          public boolean isAborted() {
+            return false;
+          }
+        });
+
+    masterTracker.start();
+    zkw.registerListener(masterTracker);
+
+    // Test (part of the) output that should have be printed by master when it aborts:
+    // (namely the part that shows the set of loaded coprocessors).
+    // In this test, there is only a single coprocessor (BuggyMasterObserver).
+    String coprocessorName =
+        BuggyMasterObserver.class.getName();
+    assertTrue(master.getLoadedCoprocessors().equals("[" + coprocessorName + "]"));
+
+    HTableDescriptor htd1 = new HTableDescriptor(TEST_TABLE1);
+    htd1.addFamily(new HColumnDescriptor(TEST_FAMILY1));
+
+    boolean threwDNRE = false;
+    try {
+      HBaseAdmin admin = UTIL.getHBaseAdmin();
+      admin.createTable(htd1);
+    } catch (IOException e) {
+      if (e.getClass().getName().equals("org.apache.hadoop.hbase.DoNotRetryIOException")) {
+        threwDNRE = true;
+      }
+    } finally {
+      assertTrue(threwDNRE);
+    }
+
+    // wait for a few seconds to make sure that the Master hasn't aborted.
+    try {
+      Thread.sleep(3000);
+    } catch (InterruptedException e) {
+      fail("InterruptedException while sleeping.");
+    }
+
+    assertFalse("Master survived coprocessor NPE, as expected.",
+        masterTracker.masterZKNodeWasDeleted);
+
+    String loadedCoprocessors = master.getLoadedCoprocessors();
+    assertTrue(loadedCoprocessors.equals("[" + coprocessorName + "]"));
+
+    // Verify that BuggyMasterObserver has been removed due to its misbehavior
+    // by creating another table: should not have a problem this time.
+    HTableDescriptor htd2 = new HTableDescriptor(TEST_TABLE2);
+    htd2.addFamily(new HColumnDescriptor(TEST_FAMILY2));
+    HBaseAdmin admin = UTIL.getHBaseAdmin();
+    try {
+      admin.createTable(htd2);
+    } catch (IOException e) {
+      fail("Failed to create table after buggy coprocessor removal: " + e);
+    }
+  }
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java?rev=1175170&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java Sat Sep 24 14:11:05 2011
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.coprocessor;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Tests unhandled exceptions thrown by coprocessors running on a regionserver..
+ * Expected result is that the regionserver will abort with an informative
+ * error message describing the set of its loaded coprocessors for crash
+ * diagnosis. (HBASE-4014).
+ */
+public class TestRegionServerCoprocessorExceptionWithAbort {
+  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    // set configure to indicate which cp should be loaded
+    Configuration conf = TEST_UTIL.getConfiguration();
+    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
+        BuggyRegionObserver.class.getName());
+    conf.set("hbase.coprocessor.abortonerror", "true");
+    TEST_UTIL.startMiniCluster(2);
+  }
+
+  @AfterClass
+  public static void teardownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Test(timeout=30000)
+  public void testExceptionFromCoprocessorDuringPut()
+      throws IOException {
+    // When we try to write to TEST_TABLE, the buggy coprocessor will
+    // cause a NullPointerException, which will cause the regionserver (which
+    // hosts the region we attempted to write to) to abort.
+    byte[] TEST_TABLE = Bytes.toBytes("observed_table");
+    byte[] TEST_FAMILY = Bytes.toBytes("aaa");
+
+    HTable table = TEST_UTIL.createTable(TEST_TABLE, TEST_FAMILY);
+    TEST_UTIL.createMultiRegions(table, TEST_FAMILY);
+
+    // Note which regionServer will abort (after put is attempted).
+    HRegionServer regionServer =
+        TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE);
+    try {
+      final byte[] ROW = Bytes.toBytes("bbb");
+      Put put = new Put(ROW);
+      put.add(TEST_FAMILY, ROW, ROW);
+      table.put(put);
+    } catch (IOException e) {
+      fail("put() failed: " + e);
+    }
+    // Wait up to 30 seconds for regionserver to abort.
+    boolean regionServerAborted = false;
+    for (int i = 0; i < 30; i++) {
+      if (regionServer.isAborted()) {
+        regionServerAborted = true;
+        break;
+      }
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        fail("InterruptedException while waiting for regionserver " +
+            "zk node to be deleted.");
+      }
+    }
+    assertTrue("RegionServer aborted on coprocessor exception, as expected.",
+        regionServerAborted);
+  }
+
+    public static class BuggyRegionObserver extends SimpleRegionObserver {
+    @Override
+    public void prePut(final ObserverContext<RegionCoprocessorEnvironment> c,
+                       final Map<byte[], List<KeyValue>> familyMap,
+                       final boolean writeToWAL) {
+      String tableName =
+          c.getEnvironment().getRegion().getRegionInfo().getTableNameAsString();
+      if (tableName.equals("observed_table")) {
+        Integer i = null;
+        i = i + 1;
+      }
+    }
+  }
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java?rev=1175170&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java Sat Sep 24 14:11:05 2011
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.coprocessor;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Tests unhandled exceptions thrown by coprocessors running on regionserver.
+ * Expected result is that the master will remove the buggy coprocessor from
+ * its set of coprocessors and throw a org.apache.hadoop.hbase.DoNotRetryIOException
+ * back to the client.
+ * (HBASE-4014).
+ */
+public class TestRegionServerCoprocessorExceptionWithRemove {
+  public static class BuggyRegionObserver extends SimpleRegionObserver {
+    @Override
+    public void prePut(final ObserverContext<RegionCoprocessorEnvironment> c,
+                       final Map<byte[], List<KeyValue>> familyMap,
+                       final boolean writeToWAL) {
+      String tableName =
+          c.getEnvironment().getRegion().getRegionInfo().getTableNameAsString();
+      if (tableName.equals("observed_table")) {
+        Integer i = null;
+        i = i + 1;
+      }
+    }
+  }
+
+  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  private static ZooKeeperWatcher zkw = null;
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    // set configure to indicate which cp should be loaded
+    Configuration conf = TEST_UTIL.getConfiguration();
+    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
+        BuggyRegionObserver.class.getName());
+    TEST_UTIL.startMiniCluster(2);
+  }
+
+  @AfterClass
+  public static void teardownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Test(timeout=30000)
+  public void testExceptionFromCoprocessorDuringPut()
+      throws IOException {
+    // Set watches on the zookeeper nodes for all of the regionservers in the
+    // cluster. When we try to write to TEST_TABLE, the buggy coprocessor will
+    // cause a NullPointerException, which will cause the regionserver (which
+    // hosts the region we attempted to write to) to abort. In turn, this will
+    // cause the nodeDeleted() method of the DeadRegionServer tracker to
+    // execute, which will set the rsZKNodeDeleted flag to true, which will
+    // pass this test.
+
+    byte[] TEST_TABLE = Bytes.toBytes("observed_table");
+    byte[] TEST_FAMILY = Bytes.toBytes("aaa");
+
+    HTable table = TEST_UTIL.createTable(TEST_TABLE, TEST_FAMILY);
+    TEST_UTIL.createMultiRegions(table, TEST_FAMILY);
+    // Note which regionServer that should survive the buggy coprocessor's
+    // prePut().
+    HRegionServer regionServer =
+        TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE);
+
+    // same logic as {@link TestMasterCoprocessorExceptionWithRemove},
+    // but exception will be RetriesExhaustedWithDetailException rather
+    // than DoNotRetryIOException. The latter exception is what the RegionServer
+    // will have actually thrown, but the client will wrap this in a
+    // RetriesExhaustedWithDetailException.
+    // We will verify that "DoNotRetryIOException" appears in the text of the
+    // the exception's detailMessage.
+    boolean threwDNRE = false;
+    try {
+      final byte[] ROW = Bytes.toBytes("bbb");
+      Put put = new Put(ROW);
+      put.add(TEST_FAMILY, ROW, ROW);
+      table.put(put);
+    } catch (RetriesExhaustedWithDetailsException e) {
+      // below, could call instead :
+      // startsWith("Failed 1 action: DoNotRetryIOException.")
+      // But that might be too brittle if client-side
+      // DoNotRetryIOException-handler changes its message.
+      assertTrue(e.getMessage().contains("DoNotRetryIOException"));
+      threwDNRE = true;
+    } finally {
+      assertTrue(threwDNRE);
+    }
+
+    // Wait 3 seconds for the regionserver to abort: expected result is that
+    // it will survive and not abort.
+    for (int i = 0; i < 3; i++) {
+      assertFalse(regionServer.isAborted());
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        fail("InterruptedException while waiting for regionserver " +
+            "zk node to be deleted.");
+      }
+    }
+  }
+}



Mime
View raw message