hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j...@apache.org
Subject svn commit: r591880 - in /lucene/hadoop/trunk/src/contrib/hbase: CHANGES.txt src/test/org/apache/hadoop/hbase/TestRegionServerExit.java
Date Mon, 05 Nov 2007 05:06:37 GMT
Author: jimk
Date: Sun Nov  4 21:06:35 2007
New Revision: 591880

URL: http://svn.apache.org/viewvc?rev=591880&view=rev
Log:
HADOOP-2109 
- Fix another race condition in processing dead servers,
- Fix error online meta regions: was using region name and not startKey as key for map.put.
- Change TestRegionServerExit to always kill the region server for the META region. This makes
the test more deterministic and getting META reassigned was problematic.

Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=591880&r1=591879&r2=591880&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Sun Nov  4 21:06:35 2007
@@ -22,7 +22,10 @@
    HADOOP-2137 hql.jsp : The character 0x19 is not valid
    HADOOP-2109 Fix another race condition in processing dead servers,
                Fix error online meta regions: was using region name and not
-               startKey as key for map.put
+               startKey as key for map.put. Change TestRegionServerExit to
+               always kill the region server for the META region. This makes
+               the test more deterministic and getting META reassigned was
+               problematic.
 
   IMPROVEMENTS
     HADOOP-2401 Add convenience put method that takes writable

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java?rev=591880&r1=591879&r2=591880&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestRegionServerExit.java
Sun Nov  4 21:06:35 2007
@@ -20,6 +20,8 @@
 package org.apache.hadoop.hbase;
 
 import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
 import java.util.TreeMap;
 
 import org.apache.commons.logging.Log;
@@ -55,9 +57,9 @@
     // Start up a new region server to take over serving of root and meta
     // after we shut down the current meta/root host.
     this.cluster.startRegionServer();
-    // Now abort the region server and wait for it to go down.
-    this.cluster.abortRegionServer(0);
-    LOG.info(this.cluster.waitOnRegionServer(0) + " has been aborted");
+    // Now abort the meta region server and wait for it to go down and come back
+    stopOrAbortMetaRegionServer(true);
+    // Verify that everything is back up.
     Thread t = startVerificationThread(tableName, row);
     t.start();
     threadDumpingJoin(t);
@@ -76,9 +78,9 @@
     // Start up a new region server to take over serving of root and meta
     // after we shut down the current meta/root host.
     this.cluster.startRegionServer();
-    // Now shutdown the region server and wait for it to go down.
-    this.cluster.stopRegionServer(0);
-    LOG.info(this.cluster.waitOnRegionServer(0) + " has been shutdown");
+    // Now abort the meta region server and wait for it to go down and come back
+    stopOrAbortMetaRegionServer(false);
+    // Verify that everything is back up.
     Thread t = startVerificationThread(tableName, row);
     t.start();
     threadDumpingJoin(t);
@@ -98,6 +100,41 @@
     table.commit(lockid);
     return row;
   }
+
+  /*
+   * Stop the region server serving the meta region and wait for the meta region
+   * to get reassigned. This is always the most problematic case.
+   * 
+   * @param abort set to true if region server should be aborted, if false it
+   * is just shut down.
+   */
+  private void stopOrAbortMetaRegionServer(boolean abort) {
+    List<LocalHBaseCluster.RegionServerThread> regionThreads =
+      cluster.getRegionThreads();
+    
+    int server = -1;
+    for (int i = 0; i < regionThreads.size() && server == -1; i++) {
+      HRegionServer s = regionThreads.get(i).getRegionServer();
+      Collection<HRegion> regions = s.getOnlineRegions().values();
+      for (HRegion r : regions) {
+        if (r.getTableDesc().getName().equals(HConstants.META_TABLE_NAME)) {
+          server = i;
+        }
+      }
+    }
+    if (server == -1) {
+      LOG.fatal("could not find region server serving meta region");
+      fail();
+    }
+    if (abort) {
+      this.cluster.abortRegionServer(server);
+      
+    } else {
+      this.cluster.stopRegionServer(server);
+    }
+    LOG.info(this.cluster.waitOnRegionServer(server) + " has been " +
+        (abort ? "aborted" : "shut down"));
+  }
   
   /*
    * Run verification in a thread so I can concurrently run a thread-dumper
@@ -111,6 +148,18 @@
       final Text row) {
     Runnable runnable = new Runnable() {
       public void run() {
+        try {
+          // Now try to open a scanner on the meta table. Should stall until
+          // meta server comes back up.
+          HTable t = new HTable(conf, HConstants.META_TABLE_NAME);
+          HScannerInterface s =
+            t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, new Text());
+          s.close();
+          
+        } catch (IOException e) {
+          LOG.fatal("could not re-open meta table because", e);
+          fail();
+        }
         HScannerInterface scanner = null;
         try {
           // Verify that the client can find the data after the region has moved



Mime
View raw message