hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject svn commit: r954705 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/regionserver/ src/test/java/org/apache/hadoop/hbase/ src/test/java/org/apache/hadoop/hbase/master/
Date Tue, 15 Jun 2010 05:04:42 GMT
Author: todd
Date: Tue Jun 15 05:04:42 2010
New Revision: 954705

URL: http://svn.apache.org/viewvc?rev=954705&view=rev
Log:
HBASE-2726. Region Server should never abort without an informative log message

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=954705&r1=954704&r2=954705&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Tue Jun 15 05:04:42 2010
@@ -694,6 +694,8 @@ Release 0.21.0 - Unreleased
    HBASE-2468  Improvements to prewarm META cache on clients
                (Mingjie Lai via Stack)
    HBASE-2353  Batch puts should sync HLog as few times as possible
+   HBASE-2726  Region Server should never abort without an informative log
+               message
 
   NEW FEATURES
    HBASE-1961  HBase EC2 scripts

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=954705&r1=954704&r2=954705&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue
Jun 15 05:04:42 2010
@@ -377,7 +377,7 @@ public class HRegionServer implements HR
       if (restart) {
         restart();
       } else {
-        abort();
+        abort("ZooKeeper session expired");
       }
     } else if (type == EventType.NodeDeleted) {
       watchMasterAddress();
@@ -397,8 +397,7 @@ public class HRegionServer implements HR
   }
 
   private void restart() {
-    LOG.info("Restarting Region Server");
-    abort();
+    abort("Restarting region server");
     Threads.shutdown(regionServerThread);
     boolean done = false;
     while (!done) {
@@ -568,8 +567,7 @@ public class HRegionServer implements HR
       } // for
     } catch (Throwable t) {
       if (!checkOOME(t)) {
-        LOG.fatal("Unhandled exception. Aborting...", t);
-        abort();
+        abort("Unhandled exception", t);
       }
     }
     this.leases.closeAfterLeasesExpire();
@@ -836,8 +834,7 @@ public class HRegionServer implements HR
       (e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
       (e.getMessage() != null &&
         e.getMessage().contains("java.lang.OutOfMemoryError"))) {
-      LOG.fatal("OutOfMemoryError, aborting.", e);
-      abort();
+      abort("OutOfMemoryError, aborting", e);
       stop = true;
     }
     return stop;
@@ -855,8 +852,7 @@ public class HRegionServer implements HR
       try {
         FSUtils.checkFileSystemAvailable(this.fs);
       } catch (IOException e) {
-        LOG.fatal("Shutting down HRegionServer: file system not available", e);
-        abort();
+        abort("File System not available", e);
         this.fsOk = false;
       }
     }
@@ -1008,8 +1004,7 @@ public class HRegionServer implements HR
     String n = Thread.currentThread().getName();
     UncaughtExceptionHandler handler = new UncaughtExceptionHandler() {
       public void uncaughtException(Thread t, Throwable e) {
-        abort();
-        LOG.fatal("Set stop flag in " + t.getName(), e);
+        abort("Uncaught exception in service thread " + t.getName(), e);
       }
     };
     Threads.setDaemonThreadRunning(this.hlogRoller, n + ".logRoller",
@@ -1132,8 +1127,15 @@ public class HRegionServer implements HR
    * log it is using and without notifying the master.
    * Used unit testing and on catastrophic events such as HDFS is yanked out
    * from under hbase or we OOME.
+   * @param reason the reason we are aborting
+   * @param cause the exception that caused the abort, or null
    */
-  public void abort() {
+  public void abort(String reason, Throwable cause) {
+    if (cause != null) {
+      LOG.fatal("Aborting region server " + this + ": " + reason, cause);
+    } else {
+      LOG.fatal("Aborting region server " + this + ": " + reason);
+    }
     this.abortRequested = true;
     this.reservedSpace.clear();
     if (this.metrics != null) {
@@ -1141,6 +1143,13 @@ public class HRegionServer implements HR
     }
     stop();
   }
+  
+  /**
+   * @see HRegionServer#abort(String, Throwable)
+   */
+  public void abort(String reason) {
+    abort(reason, null);
+  }
 
   /*
    * Simulate a kill -9 of this server.
@@ -1149,7 +1158,7 @@ public class HRegionServer implements HR
    */
   protected void kill() {
     this.killed = true;
-    abort();
+    abort("Simulated kill");
   }
 
   /**

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java?rev=954705&r1=954704&r2=954705&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java Tue Jun
15 05:04:42 2010
@@ -86,20 +86,21 @@ class LogRoller extends Thread implement
       } catch (FailedLogCloseException e) {
         LOG.fatal("Forcing server shutdown", e);
         server.checkFileSystem();
-        server.abort();
+        server.abort("Failed log close in log roller", e);
       } catch (java.net.ConnectException e) {
         LOG.fatal("Forcing server shutdown", e);
         server.checkFileSystem();
-        server.abort();
+        server.abort("Failed connect in log roller", e);
       } catch (IOException ex) {
         LOG.fatal("Log rolling failed with ioe: ",
           RemoteExceptionHandler.checkIOException(ex));
         server.checkFileSystem();
         // Abort if we get here.  We probably won't recover an IOE. HBASE-1132
-        server.abort();
+        server.abort("IOE in log roller", ex);
       } catch (Exception ex) {
         LOG.error("Log rolling failed", ex);
         server.checkFileSystem();
+        server.abort("Log rolling failed", ex);
       } finally {
         rollLog.set(false);
         rollLock.unlock();

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java?rev=954705&r1=954704&r2=954705&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java Tue
Jun 15 05:04:42 2010
@@ -261,8 +261,7 @@ class MemStoreFlusher extends Thread imp
       // is required. Currently the only way to do this is a restart of
       // the server. Abort because hdfs is probably bad (HBASE-644 is a case
       // where hdfs was bad but passed the hdfs check).
-      LOG.fatal("Replay of hlog required. Forcing server shutdown", ex);
-      server.abort();
+      server.abort("Replay of HLog required. Forcing server shutdown", ex);
       return false;
     } catch (IOException ex) {
       LOG.error("Cache flush failed"

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java?rev=954705&r1=954704&r2=954705&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java Tue Jun 15 05:04:42
2010
@@ -274,7 +274,7 @@ public class MiniHBaseCluster {
   public String abortRegionServer(int serverNumber) {
     HRegionServer server = getRegionServer(serverNumber);
     LOG.info("Aborting " + server.toString());
-    server.abort();
+    server.abort("Aborting for tests", new Exception("Trace info"));
     return server.toString();
   }
 

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java?rev=954705&r1=954704&r2=954705&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java Tue
Jun 15 05:04:42 2010
@@ -337,10 +337,10 @@ public class TestMasterTransitions {
       if (!incomingMsg.isType(HMsg.Type.MSG_REPORT_PROCESS_OPEN)) return true;
       // Save the region that is in transition so can test later it came back.
       this.regionToFind = incomingMsg.getRegionInfo();
-      LOG.info("ABORTING " + this.victim + " because got a " +
+      String msg = "ABORTING " + this.victim + " because got a " +
         HMsg.Type.MSG_REPORT_PROCESS_OPEN + " on this server for " +
-        incomingMsg.getRegionInfo().getRegionNameAsString());
-      this.victim.abort();
+        incomingMsg.getRegionInfo().getRegionNameAsString();
+      this.victim.abort(msg);
       this.abortSent = true;
       return true;
     }



Mime
View raw message